sensitive_word_filter 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 19204d951c5ad743e31cf097fff02014ff781b2e9c61a94eb25a878df247a0cf
4
+ data.tar.gz: bfb3648c45c29ffe76b1889eb1833a37feccd1237d6c72d750ae9a7d02d6ba98
5
+ SHA512:
6
+ metadata.gz: 0b1cf3d5298f6eebe0455f975d50e992bcc9d3a8d225f4b228eea9ddd0d15bd5437e06095203a9891f570c6325cfed6a752945862771369a249e7d3c4e2f088d
7
+ data.tar.gz: 577fa3e37b1220e652e3813a990f10f655414868f855c890c5fa7a9c8213cc104906d5bac0bd6244e7ff2db1e19e3e6c91154eb7996e51345f00de19c2387ae4
@@ -0,0 +1,45 @@
1
+ # SensitiveWordFilter
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/sensitive_word_filter`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'sensitive_word_filter'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle install
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install sensitive_word_filter
22
+
23
+ ## Usage
24
+
25
+ SensitiveWordFilter.scan('色情电影')
26
+ SensitiveWordFilter.scan('色情电影', ['./text.text'])
27
+
28
+ ## Development
29
+
30
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
31
+
32
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
33
+
34
+ ## Contributing
35
+
36
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/sensitive_word_filter. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/sensitive_word_filter/blob/master/CODE_OF_CONDUCT.md).
37
+
38
+
39
+ ## License
40
+
41
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
42
+
43
+ ## Code of Conduct
44
+
45
+ Everyone interacting in the SensitiveWordFilter project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/sensitive_word_filter/blob/master/CODE_OF_CONDUCT.md).
@@ -0,0 +1,27 @@
1
+ require "sensitive_word_filter/version"
2
+ require "sensitive_word_filter/load"
3
+ require "sensitive_word_filter/scan"
4
+
5
+ module SensitiveWordFilter
6
+ class Error < StandardError; end
7
+
8
+ def self.scan(text='', file_paths=[])
9
+ Filter.new(file_paths).scan(text)
10
+ end
11
+
12
+ class Filter
13
+ attr_reader :file_paths
14
+
15
+ def initialize(file_paths=[])
16
+ if file_paths.empty?
17
+ raise Error.new('sensitive words file path is empty')
18
+ else
19
+ @words = Load.new(file_paths).load
20
+ end
21
+ end
22
+
23
+ def scan(text='')
24
+ Scan.new(@words).scan(text)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,36 @@
1
+ module SensitiveWordFilter
2
+ class DfaTree
3
+ Word = Struct.new(:is_end, :value)
4
+
5
+ attr_accessor :words, :tree
6
+
7
+ def initialize(words=nil)
8
+ @tree = Hash.new
9
+ @words = words || []
10
+ end
11
+
12
+ def dfa_tree
13
+ words.each do |word|
14
+ word_hash = tree
15
+
16
+ _word = word.strip
17
+ word_length = _word.length
18
+
19
+ (1..word_length).each do |i|
20
+ c = _word[i-1]
21
+ if word_hash[c].nil?
22
+ if i == word_length
23
+ word_hash[c] = Word.new(true, Hash.new)
24
+ else
25
+ word_hash[c] = Word.new(false, Hash.new)
26
+ word_hash = word_hash[c].value
27
+ end
28
+ else
29
+ word_hash = word_hash[c].value
30
+ end
31
+ end
32
+ end
33
+ tree
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,33 @@
1
+ module SensitiveWordFilter
2
+ class Load
3
+ attr_accessor :words
4
+ attr_reader :file_paths
5
+
6
+ def initialize(file_paths)
7
+ @file_paths = file_paths
8
+ @words = Array.new
9
+ end
10
+
11
+ def load
12
+ if file_paths.is_a?(Array)
13
+ file_paths.each do |file_path|
14
+ _load(file_path)
15
+ end
16
+ else
17
+ _load(file_paths)
18
+ end
19
+
20
+ words
21
+ end
22
+
23
+ private
24
+
25
+ def _load(file_path)
26
+ f = File.open(file_path, "r")
27
+ f.each_line do |line|
28
+ words.push(line.gsub("\n", ''))
29
+ end
30
+ f.close
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,52 @@
1
+ require "sensitive_word_filter/dfa_tree"
2
+
3
+ module SensitiveWordFilter
4
+ class Scan
5
+ attr_accessor :sensitive_map
6
+
7
+ def initialize(words=[])
8
+ @sensitive_map = DfaTree.new(words).dfa_tree
9
+ end
10
+
11
+ def scan(text = '')
12
+ return [] if text.nil?
13
+ _scan(gsub_text(text))
14
+ end
15
+
16
+ private
17
+
18
+ def _scan(text = '', sensitive_words = [], sensitive_hash = sensitive_map, temp_text = '')
19
+ c = text[0]
20
+ return sensitive_words if text.empty?
21
+ return sensitive_words if c.empty?
22
+
23
+ text_size = text.length
24
+ w = sensitive_hash[c]
25
+
26
+ if w.nil?
27
+ if temp_text.size > 0
28
+ new_text = temp_text + text
29
+ new_length = new_text.length
30
+ text = new_text[1..(new_length-1)]
31
+ else
32
+ text = text[1..(text_size-1)]
33
+ end
34
+ temp_text = ''
35
+ send(__method__, text, sensitive_words)
36
+ else
37
+ temp_text += c
38
+ text = text[1..(text_size-1)]
39
+ if w.is_end
40
+ sensitive_words.push(temp_text)
41
+ send(__method__, text, sensitive_words)
42
+ else
43
+ send(__method__, text, sensitive_words, w.value, temp_text)
44
+ end
45
+ end
46
+ end
47
+
48
+ def gsub_text(text)
49
+ text.strip.gsub(/[`~!@#$^&*()=|{}':;',\\\[\]\.<>\/?~!@#¥……&*()——|{}【】';:""'。,、?]|\s/,'')
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,3 @@
1
+ module SensitiveWordFilter
2
+ VERSION = "0.1.1"
3
+ end
metadata ADDED
@@ -0,0 +1,54 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sensitive_word_filter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - sanm1992
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-10-10 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: sensitive word filter base on DFA
14
+ email:
15
+ - 1320695403@qq.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README.md
21
+ - lib/sensitive_word_filter.rb
22
+ - lib/sensitive_word_filter/dfa_tree.rb
23
+ - lib/sensitive_word_filter/load.rb
24
+ - lib/sensitive_word_filter/scan.rb
25
+ - lib/sensitive_word_filter/version.rb
26
+ homepage: https://github.com/sanm1992/sensitive_word_filter
27
+ licenses:
28
+ - MIT
29
+ metadata:
30
+ allowed_push_host: https://rubygems.org
31
+ homepage_uri: https://github.com/sanm1992/sensitive_word_filter
32
+ source_code_uri: https://github.com/sanm1992/sensitive_word_filter
33
+ changelog_uri: https://github.com/sanm1992/sensitive_word_filter
34
+ post_install_message:
35
+ rdoc_options: []
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 2.3.0
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ requirements: []
49
+ rubyforge_project:
50
+ rubygems_version: 2.7.6.2
51
+ signing_key:
52
+ specification_version: 4
53
+ summary: sensitive word filter
54
+ test_files: []