text_detector 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -4
- data/examples/regexp.rb +15 -0
- data/examples/simple.rb +15 -0
- data/lib/text_detector/dictionary/file.rb +7 -8
- data/lib/text_detector/dictionary.rb +2 -1
- data/lib/text_detector/version.rb +1 -1
- data/text_detector.gemspec +2 -2
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 315dfa048ec3b6dd195976900bb573407cbfdfc9
|
4
|
+
data.tar.gz: 2441e251ec922dc7900907fab919a068f1dbd614
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e31c2fe0028629483e7dc6079c16314217ef15c471289b9ffedc8030c3ea807db09fc55905080eec18f196de2009f1b9615c0e7770e9e946bd802d4ea7b38d18
|
7
|
+
data.tar.gz: f1134bc00b6cd5261d566b545c0ea9b2c108f6c36c1b76c860f2ecab935f5e1d4aaf384db662d9deb65149b09781a50709e60bbdaa231bf608a0d37ace52a547
|
data/README.md
CHANGED
@@ -3,9 +3,7 @@
|
|
3
3
|
|
4
4
|
# TextDetector
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
TODO: Delete this and the text above, and describe your gem
|
6
|
+
This is an experimental implementation for detecting text from document.
|
9
7
|
|
10
8
|
## Installation
|
11
9
|
|
@@ -25,7 +23,23 @@ Or install it yourself as:
|
|
25
23
|
|
26
24
|
## Usage
|
27
25
|
|
28
|
-
|
26
|
+
See also `bin/benchmark` and `examples/`.
|
27
|
+
|
28
|
+
### Regexp
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
require 'text_detector'
|
32
|
+
regexp_detector = TextDetector.factory(:regexp, open('dictionary.txt'))
|
33
|
+
regexp_detector.detect('The detector detects NG word from this document. The dictionary has many NG words.')
|
34
|
+
```
|
35
|
+
|
36
|
+
### Like Boyer-Moore String Search Algorithm
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
require 'text_detector'
|
40
|
+
simple_detector = TextDetector.factory(:simple, open('dictionary.txt'))
|
41
|
+
simple_detector.detect('The detector detects NG word from this document. The dictionary has many NG words.')
|
42
|
+
```
|
29
43
|
|
30
44
|
## Development
|
31
45
|
|
data/examples/regexp.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
require 'text_detector'
|
5
|
+
|
6
|
+
dictionary = StringIO.new(<<EOF)
|
7
|
+
NG_A
|
8
|
+
NG_B
|
9
|
+
NG_C
|
10
|
+
EOF
|
11
|
+
|
12
|
+
regexp_detector = TextDetector.factory(:regexp, dictionary)
|
13
|
+
|
14
|
+
p regexp_detector.detect('This document not include NG words.')
|
15
|
+
p regexp_detector.detect('This document include NG words: NG_A')
|
data/examples/simple.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
require 'text_detector'
|
5
|
+
|
6
|
+
dictionary = StringIO.new(<<EOF)
|
7
|
+
NG_A
|
8
|
+
NG_B
|
9
|
+
NG_C
|
10
|
+
EOF
|
11
|
+
|
12
|
+
simple_detector = TextDetector.factory(:simple, dictionary)
|
13
|
+
|
14
|
+
p simple_detector.detect('This document not include NG words.')
|
15
|
+
p simple_detector.detect('This document include NG words: NG_A')
|
@@ -20,15 +20,14 @@ module TextDetector
|
|
20
20
|
@dictionary = Set.new
|
21
21
|
@depth = []
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
@depth << text.size
|
30
|
-
end
|
23
|
+
dictionary.each_line do |line|
|
24
|
+
text = TextDetector.normalize(line.chomp)
|
25
|
+
next if text.size == 0
|
26
|
+
|
27
|
+
@dictionary << text
|
28
|
+
@depth << text.size
|
31
29
|
end
|
30
|
+
|
32
31
|
@depth = @depth.sort.uniq
|
33
32
|
end
|
34
33
|
end
|
data/text_detector.gemspec
CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["koshigoe"]
|
10
10
|
spec.email = ["koshigoeb@gmail.com"]
|
11
11
|
|
12
|
-
spec.summary = %q{
|
13
|
-
spec.description = %q{
|
12
|
+
spec.summary = %q{This is an experimental implementation for detecting text from document.}
|
13
|
+
spec.description = %q{This is an experimental implementation for detecting text from document.}
|
14
14
|
spec.homepage = "https://github.com/koshigoe/text_detector"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_detector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- koshigoe
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
description:
|
55
|
+
description: This is an experimental implementation for detecting text from document.
|
56
56
|
email:
|
57
57
|
- koshigoeb@gmail.com
|
58
58
|
executables: []
|
@@ -68,6 +68,8 @@ files:
|
|
68
68
|
- bin/benchmark
|
69
69
|
- bin/console
|
70
70
|
- bin/setup
|
71
|
+
- examples/regexp.rb
|
72
|
+
- examples/simple.rb
|
71
73
|
- lib/text_detector.rb
|
72
74
|
- lib/text_detector/detector.rb
|
73
75
|
- lib/text_detector/detector/base.rb
|
@@ -102,5 +104,5 @@ rubyforge_project:
|
|
102
104
|
rubygems_version: 2.4.5
|
103
105
|
signing_key:
|
104
106
|
specification_version: 4
|
105
|
-
summary:
|
107
|
+
summary: This is an experimental implementation for detecting text from document.
|
106
108
|
test_files: []
|