sensitive_word_filter 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 19204d951c5ad743e31cf097fff02014ff781b2e9c61a94eb25a878df247a0cf
4
+ data.tar.gz: bfb3648c45c29ffe76b1889eb1833a37feccd1237d6c72d750ae9a7d02d6ba98
5
+ SHA512:
6
+ metadata.gz: 0b1cf3d5298f6eebe0455f975d50e992bcc9d3a8d225f4b228eea9ddd0d15bd5437e06095203a9891f570c6325cfed6a752945862771369a249e7d3c4e2f088d
7
+ data.tar.gz: 577fa3e37b1220e652e3813a990f10f655414868f855c890c5fa7a9c8213cc104906d5bac0bd6244e7ff2db1e19e3e6c91154eb7996e51345f00de19c2387ae4
@@ -0,0 +1,45 @@
1
+ # SensitiveWordFilter
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/sensitive_word_filter`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'sensitive_word_filter'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle install
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install sensitive_word_filter
22
+
23
+ ## Usage
24
+
25
+ SensitiveWordFilter.scan('色情电影')
26
+ SensitiveWordFilter.scan('色情电影', ['./text.text'])
27
+
28
+ ## Development
29
+
30
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
31
+
32
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
33
+
34
+ ## Contributing
35
+
36
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/sensitive_word_filter. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/sensitive_word_filter/blob/master/CODE_OF_CONDUCT.md).
37
+
38
+
39
+ ## License
40
+
41
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
42
+
43
+ ## Code of Conduct
44
+
45
+ Everyone interacting in the SensitiveWordFilter project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/sensitive_word_filter/blob/master/CODE_OF_CONDUCT.md).
@@ -0,0 +1,27 @@
1
+ require "sensitive_word_filter/version"
2
+ require "sensitive_word_filter/load"
3
+ require "sensitive_word_filter/scan"
4
+
5
+ module SensitiveWordFilter
6
+ class Error < StandardError; end
7
+
8
+ def self.scan(text='', file_paths=[])
9
+ Filter.new(file_paths).scan(text)
10
+ end
11
+
12
+ class Filter
13
+ attr_reader :file_paths
14
+
15
+ def initialize(file_paths=[])
16
+ if file_paths.empty?
17
+ raise Error.new('sensitive words file path is empty')
18
+ else
19
+ @words = Load.new(file_paths).load
20
+ end
21
+ end
22
+
23
+ def scan(text='')
24
+ Scan.new(@words).scan(text)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,36 @@
1
+ module SensitiveWordFilter
2
+ class DfaTree
3
+ Word = Struct.new(:is_end, :value)
4
+
5
+ attr_accessor :words, :tree
6
+
7
+ def initialize(words=nil)
8
+ @tree = Hash.new
9
+ @words = words || []
10
+ end
11
+
12
+ def dfa_tree
13
+ words.each do |word|
14
+ word_hash = tree
15
+
16
+ _word = word.strip
17
+ word_length = _word.length
18
+
19
+ (1..word_length).each do |i|
20
+ c = _word[i-1]
21
+ if word_hash[c].nil?
22
+ if i == word_length
23
+ word_hash[c] = Word.new(true, Hash.new)
24
+ else
25
+ word_hash[c] = Word.new(false, Hash.new)
26
+ word_hash = word_hash[c].value
27
+ end
28
+ else
29
+ word_hash = word_hash[c].value
30
+ end
31
+ end
32
+ end
33
+ tree
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,33 @@
1
+ module SensitiveWordFilter
2
+ class Load
3
+ attr_accessor :words
4
+ attr_reader :file_paths
5
+
6
+ def initialize(file_paths)
7
+ @file_paths = file_paths
8
+ @words = Array.new
9
+ end
10
+
11
+ def load
12
+ if file_paths.is_a?(Array)
13
+ file_paths.each do |file_path|
14
+ _load(file_path)
15
+ end
16
+ else
17
+ _load(file_paths)
18
+ end
19
+
20
+ words
21
+ end
22
+
23
+ private
24
+
25
+ def _load(file_path)
26
+ f = File.open(file_path, "r")
27
+ f.each_line do |line|
28
+ words.push(line.gsub("\n", ''))
29
+ end
30
+ f.close
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,52 @@
1
+ require "sensitive_word_filter/dfa_tree"
2
+
3
+ module SensitiveWordFilter
4
+ class Scan
5
+ attr_accessor :sensitive_map
6
+
7
+ def initialize(words=[])
8
+ @sensitive_map = DfaTree.new(words).dfa_tree
9
+ end
10
+
11
+ def scan(text = '')
12
+ return [] if text.nil?
13
+ _scan(gsub_text(text))
14
+ end
15
+
16
+ private
17
+
18
+ def _scan(text = '', sensitive_words = [], sensitive_hash = sensitive_map, temp_text = '')
19
+ c = text[0]
20
+ return sensitive_words if text.empty?
21
+ return sensitive_words if c.empty?
22
+
23
+ text_size = text.length
24
+ w = sensitive_hash[c]
25
+
26
+ if w.nil?
27
+ if temp_text.size > 0
28
+ new_text = temp_text + text
29
+ new_length = new_text.length
30
+ text = new_text[1..(new_length-1)]
31
+ else
32
+ text = text[1..(text_size-1)]
33
+ end
34
+ temp_text = ''
35
+ send(__method__, text, sensitive_words)
36
+ else
37
+ temp_text += c
38
+ text = text[1..(text_size-1)]
39
+ if w.is_end
40
+ sensitive_words.push(temp_text)
41
+ send(__method__, text, sensitive_words)
42
+ else
43
+ send(__method__, text, sensitive_words, w.value, temp_text)
44
+ end
45
+ end
46
+ end
47
+
48
+ def gsub_text(text)
49
+ text.strip.gsub(/[`~!@#$^&*()=|{}':;',\\\[\]\.<>\/?~!@#¥……&*()——|{}【】';:""'。,、?]|\s/,'')
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,3 @@
1
+ module SensitiveWordFilter
2
+ VERSION = "0.1.1"
3
+ end
metadata ADDED
@@ -0,0 +1,54 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sensitive_word_filter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - sanm1992
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-10-10 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: sensitive word filter base on DFA
14
+ email:
15
+ - 1320695403@qq.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README.md
21
+ - lib/sensitive_word_filter.rb
22
+ - lib/sensitive_word_filter/dfa_tree.rb
23
+ - lib/sensitive_word_filter/load.rb
24
+ - lib/sensitive_word_filter/scan.rb
25
+ - lib/sensitive_word_filter/version.rb
26
+ homepage: https://github.com/sanm1992/sensitive_word_filter
27
+ licenses:
28
+ - MIT
29
+ metadata:
30
+ allowed_push_host: https://rubygems.org
31
+ homepage_uri: https://github.com/sanm1992/sensitive_word_filter
32
+ source_code_uri: https://github.com/sanm1992/sensitive_word_filter
33
+ changelog_uri: https://github.com/sanm1992/sensitive_word_filter
34
+ post_install_message:
35
+ rdoc_options: []
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 2.3.0
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ requirements: []
49
+ rubyforge_project:
50
+ rubygems_version: 2.7.6.2
51
+ signing_key:
52
+ specification_version: 4
53
+ summary: sensitive word filter
54
+ test_files: []