text_detector 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a7d68797d530b617238813bcadb52cf8649d13f6
4
- data.tar.gz: f615523b8676171aebb035c2d4432a3c596f2126
3
+ metadata.gz: 315dfa048ec3b6dd195976900bb573407cbfdfc9
4
+ data.tar.gz: 2441e251ec922dc7900907fab919a068f1dbd614
5
5
  SHA512:
6
- metadata.gz: 1b0263082fe4138421f104844389afdf78aef50b864d6dbd7c2021176296da7f8ec86e41241bc4a80ff4d461de0fb38c8dcc5bfc3ff6ca8dc6eaae885c8bd274
7
- data.tar.gz: 0c542dde4eff06e4a5a80be840acbde7e9ac3b061e03e84aea633a4b28fb032fa1c1e056d1b84d45f02ddaab79640408b820ddbfc532571de306f3f3923785cc
6
+ metadata.gz: e31c2fe0028629483e7dc6079c16314217ef15c471289b9ffedc8030c3ea807db09fc55905080eec18f196de2009f1b9615c0e7770e9e946bd802d4ea7b38d18
7
+ data.tar.gz: f1134bc00b6cd5261d566b545c0ea9b2c108f6c36c1b76c860f2ecab935f5e1d4aaf384db662d9deb65149b09781a50709e60bbdaa231bf608a0d37ace52a547
data/README.md CHANGED
@@ -3,9 +3,7 @@
3
3
 
4
4
  # TextDetector
5
5
 
6
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/text_detector`. To experiment with that code, run `bin/console` for an interactive prompt.
7
-
8
- TODO: Delete this and the text above, and describe your gem
6
+ This is an experimental implementation for detecting text from document.
9
7
 
10
8
  ## Installation
11
9
 
@@ -25,7 +23,23 @@ Or install it yourself as:
25
23
 
26
24
  ## Usage
27
25
 
28
- TODO: Write usage instructions here
26
+ See also `bin/benchmark` and `examples/`.
27
+
28
+ ### Regexp
29
+
30
+ ```ruby
31
+ require 'text_detector'
32
+ regexp_detector = TextDetector.factory(:regexp, open('dictionary.txt'))
33
+ regexp_detector.detect('The detector detects NG word from this document. The dictionary has many NG words.')
34
+ ```
35
+
36
+ ### Like Boyer-Moore String Search Algorithm
37
+
38
+ ```ruby
39
+ require 'text_detector'
40
+ simple_detector = TextDetector.factory(:simple, open('dictionary.txt'))
41
+ simple_detector.detect('The detector detects NG word from this document. The dictionary has many NG words.')
42
+ ```
29
43
 
30
44
  ## Development
31
45
 
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+
3
+ require 'stringio'
4
+ require 'text_detector'
5
+
6
+ dictionary = StringIO.new(<<EOF)
7
+ NG_A
8
+ NG_B
9
+ NG_C
10
+ EOF
11
+
12
+ regexp_detector = TextDetector.factory(:regexp, dictionary)
13
+
14
+ p regexp_detector.detect('This document not include NG words.')
15
+ p regexp_detector.detect('This document include NG words: NG_A')
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+
3
+ require 'stringio'
4
+ require 'text_detector'
5
+
6
+ dictionary = StringIO.new(<<EOF)
7
+ NG_A
8
+ NG_B
9
+ NG_C
10
+ EOF
11
+
12
+ simple_detector = TextDetector.factory(:simple, dictionary)
13
+
14
+ p simple_detector.detect('This document not include NG words.')
15
+ p simple_detector.detect('This document include NG words: NG_A')
@@ -20,15 +20,14 @@ module TextDetector
20
20
  @dictionary = Set.new
21
21
  @depth = []
22
22
 
23
- open(dictionary) do |input|
24
- input.each_line do |line|
25
- text = TextDetector.normalize(line.chomp)
26
- next if text.size == 0
27
-
28
- @dictionary << text
29
- @depth << text.size
30
- end
23
+ dictionary.each_line do |line|
24
+ text = TextDetector.normalize(line.chomp)
25
+ next if text.size == 0
26
+
27
+ @dictionary << text
28
+ @depth << text.size
31
29
  end
30
+
32
31
  @depth = @depth.sort.uniq
33
32
  end
34
33
  end
@@ -1,10 +1,11 @@
1
+ require 'stringio'
1
2
  require_relative 'dictionary/file'
2
3
 
3
4
  module TextDetector
4
5
  module Dictionary
5
6
  def self.factory(dictionary)
6
7
  case dictionary
7
- when ::File
8
+ when ::IO, ::StringIO
8
9
  TextDetector::Dictionary::File.new(dictionary)
9
10
  end
10
11
  end
@@ -1,3 +1,3 @@
1
1
  module TextDetector
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["koshigoe"]
10
10
  spec.email = ["koshigoeb@gmail.com"]
11
11
 
12
- spec.summary = %q{Text detector.}
13
- spec.description = %q{Text detector}
12
+ spec.summary = %q{This is an experimental implementation for detecting text from document.}
13
+ spec.description = %q{This is an experimental implementation for detecting text from document.}
14
14
  spec.homepage = "https://github.com/koshigoe/text_detector"
15
15
  spec.license = "MIT"
16
16
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - koshigoe
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-03-26 00:00:00.000000000 Z
11
+ date: 2015-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,7 +52,7 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
- description: Text detector
55
+ description: This is an experimental implementation for detecting text from document.
56
56
  email:
57
57
  - koshigoeb@gmail.com
58
58
  executables: []
@@ -68,6 +68,8 @@ files:
68
68
  - bin/benchmark
69
69
  - bin/console
70
70
  - bin/setup
71
+ - examples/regexp.rb
72
+ - examples/simple.rb
71
73
  - lib/text_detector.rb
72
74
  - lib/text_detector/detector.rb
73
75
  - lib/text_detector/detector/base.rb
@@ -102,5 +104,5 @@ rubyforge_project:
102
104
  rubygems_version: 2.4.5
103
105
  signing_key:
104
106
  specification_version: 4
105
- summary: Text detector.
107
+ summary: This is an experimental implementation for detecting text from document.
106
108
  test_files: []