text_detector 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a7d68797d530b617238813bcadb52cf8649d13f6
4
- data.tar.gz: f615523b8676171aebb035c2d4432a3c596f2126
3
+ metadata.gz: 315dfa048ec3b6dd195976900bb573407cbfdfc9
4
+ data.tar.gz: 2441e251ec922dc7900907fab919a068f1dbd614
5
5
  SHA512:
6
- metadata.gz: 1b0263082fe4138421f104844389afdf78aef50b864d6dbd7c2021176296da7f8ec86e41241bc4a80ff4d461de0fb38c8dcc5bfc3ff6ca8dc6eaae885c8bd274
7
- data.tar.gz: 0c542dde4eff06e4a5a80be840acbde7e9ac3b061e03e84aea633a4b28fb032fa1c1e056d1b84d45f02ddaab79640408b820ddbfc532571de306f3f3923785cc
6
+ metadata.gz: e31c2fe0028629483e7dc6079c16314217ef15c471289b9ffedc8030c3ea807db09fc55905080eec18f196de2009f1b9615c0e7770e9e946bd802d4ea7b38d18
7
+ data.tar.gz: f1134bc00b6cd5261d566b545c0ea9b2c108f6c36c1b76c860f2ecab935f5e1d4aaf384db662d9deb65149b09781a50709e60bbdaa231bf608a0d37ace52a547
data/README.md CHANGED
@@ -3,9 +3,7 @@
3
3
 
4
4
  # TextDetector
5
5
 
6
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/text_detector`. To experiment with that code, run `bin/console` for an interactive prompt.
7
-
8
- TODO: Delete this and the text above, and describe your gem
6
+ This is an experimental implementation for detecting text from document.
9
7
 
10
8
  ## Installation
11
9
 
@@ -25,7 +23,23 @@ Or install it yourself as:
25
23
 
26
24
  ## Usage
27
25
 
28
- TODO: Write usage instructions here
26
+ See also `bin/benchmark` and `examples/`.
27
+
28
+ ### Regexp
29
+
30
+ ```ruby
31
+ require 'text_detector'
32
+ regexp_detector = TextDetector.factory(:regexp, open('dictionary.txt'))
33
+ regexp_detector.detect('The detector detects NG word from this document. The dictionary has many NG words.')
34
+ ```
35
+
36
+ ### Like Boyer-Moore String Search Algorithm
37
+
38
+ ```ruby
39
+ require 'text_detector'
40
+ simple_detector = TextDetector.factory(:simple, open('dictionary.txt'))
41
+ simple_detector.detect('The detector detects NG word from this document. The dictionary has many NG words.')
42
+ ```
29
43
 
30
44
  ## Development
31
45
 
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+
3
+ require 'stringio'
4
+ require 'text_detector'
5
+
6
+ dictionary = StringIO.new(<<EOF)
7
+ NG_A
8
+ NG_B
9
+ NG_C
10
+ EOF
11
+
12
+ regexp_detector = TextDetector.factory(:regexp, dictionary)
13
+
14
+ p regexp_detector.detect('This document not include NG words.')
15
+ p regexp_detector.detect('This document include NG words: NG_A')
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+
3
+ require 'stringio'
4
+ require 'text_detector'
5
+
6
+ dictionary = StringIO.new(<<EOF)
7
+ NG_A
8
+ NG_B
9
+ NG_C
10
+ EOF
11
+
12
+ simple_detector = TextDetector.factory(:simple, dictionary)
13
+
14
+ p simple_detector.detect('This document not include NG words.')
15
+ p simple_detector.detect('This document include NG words: NG_A')
@@ -20,15 +20,14 @@ module TextDetector
20
20
  @dictionary = Set.new
21
21
  @depth = []
22
22
 
23
- open(dictionary) do |input|
24
- input.each_line do |line|
25
- text = TextDetector.normalize(line.chomp)
26
- next if text.size == 0
27
-
28
- @dictionary << text
29
- @depth << text.size
30
- end
23
+ dictionary.each_line do |line|
24
+ text = TextDetector.normalize(line.chomp)
25
+ next if text.size == 0
26
+
27
+ @dictionary << text
28
+ @depth << text.size
31
29
  end
30
+
32
31
  @depth = @depth.sort.uniq
33
32
  end
34
33
  end
@@ -1,10 +1,11 @@
1
+ require 'stringio'
1
2
  require_relative 'dictionary/file'
2
3
 
3
4
  module TextDetector
4
5
  module Dictionary
5
6
  def self.factory(dictionary)
6
7
  case dictionary
7
- when ::File
8
+ when ::IO, ::StringIO
8
9
  TextDetector::Dictionary::File.new(dictionary)
9
10
  end
10
11
  end
@@ -1,3 +1,3 @@
1
1
  module TextDetector
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["koshigoe"]
10
10
  spec.email = ["koshigoeb@gmail.com"]
11
11
 
12
- spec.summary = %q{Text detector.}
13
- spec.description = %q{Text detector}
12
+ spec.summary = %q{This is an experimental implementation for detecting text from document.}
13
+ spec.description = %q{This is an experimental implementation for detecting text from document.}
14
14
  spec.homepage = "https://github.com/koshigoe/text_detector"
15
15
  spec.license = "MIT"
16
16
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - koshigoe
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-03-26 00:00:00.000000000 Z
11
+ date: 2015-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,7 +52,7 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
- description: Text detector
55
+ description: This is an experimental implementation for detecting text from document.
56
56
  email:
57
57
  - koshigoeb@gmail.com
58
58
  executables: []
@@ -68,6 +68,8 @@ files:
68
68
  - bin/benchmark
69
69
  - bin/console
70
70
  - bin/setup
71
+ - examples/regexp.rb
72
+ - examples/simple.rb
71
73
  - lib/text_detector.rb
72
74
  - lib/text_detector/detector.rb
73
75
  - lib/text_detector/detector/base.rb
@@ -102,5 +104,5 @@ rubyforge_project:
102
104
  rubygems_version: 2.4.5
103
105
  signing_key:
104
106
  specification_version: 4
105
- summary: Text detector.
107
+ summary: This is an experimental implementation for detecting text from document.
106
108
  test_files: []