sensitive_word_filter 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +45 -0
- data/lib/sensitive_word_filter.rb +27 -0
- data/lib/sensitive_word_filter/dfa_tree.rb +36 -0
- data/lib/sensitive_word_filter/load.rb +33 -0
- data/lib/sensitive_word_filter/scan.rb +52 -0
- data/lib/sensitive_word_filter/version.rb +3 -0
- metadata +54 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 19204d951c5ad743e31cf097fff02014ff781b2e9c61a94eb25a878df247a0cf
|
4
|
+
data.tar.gz: bfb3648c45c29ffe76b1889eb1833a37feccd1237d6c72d750ae9a7d02d6ba98
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0b1cf3d5298f6eebe0455f975d50e992bcc9d3a8d225f4b228eea9ddd0d15bd5437e06095203a9891f570c6325cfed6a752945862771369a249e7d3c4e2f088d
|
7
|
+
data.tar.gz: 577fa3e37b1220e652e3813a990f10f655414868f855c890c5fa7a9c8213cc104906d5bac0bd6244e7ff2db1e19e3e6c91154eb7996e51345f00de19c2387ae4
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# SensitiveWordFilter
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/sensitive_word_filter`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'sensitive_word_filter'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle install
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install sensitive_word_filter
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
SensitiveWordFilter.scan('色情电影')
|
26
|
+
SensitiveWordFilter.scan('色情电影', ['./text.text'])
|
27
|
+
|
28
|
+
## Development
|
29
|
+
|
30
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
31
|
+
|
32
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
33
|
+
|
34
|
+
## Contributing
|
35
|
+
|
36
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/sensitive_word_filter. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/sensitive_word_filter/blob/master/CODE_OF_CONDUCT.md).
|
37
|
+
|
38
|
+
|
39
|
+
## License
|
40
|
+
|
41
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
42
|
+
|
43
|
+
## Code of Conduct
|
44
|
+
|
45
|
+
Everyone interacting in the SensitiveWordFilter project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/sensitive_word_filter/blob/master/CODE_OF_CONDUCT.md).
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require "sensitive_word_filter/version"
|
2
|
+
require "sensitive_word_filter/load"
|
3
|
+
require "sensitive_word_filter/scan"
|
4
|
+
|
5
|
+
module SensitiveWordFilter
|
6
|
+
class Error < StandardError; end
|
7
|
+
|
8
|
+
def self.scan(text='', file_paths=[])
|
9
|
+
Filter.new(file_paths).scan(text)
|
10
|
+
end
|
11
|
+
|
12
|
+
class Filter
|
13
|
+
attr_reader :file_paths
|
14
|
+
|
15
|
+
def initialize(file_paths=[])
|
16
|
+
if file_paths.empty?
|
17
|
+
raise Error.new('sensitive words file path is empty')
|
18
|
+
else
|
19
|
+
@words = Load.new(file_paths).load
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def scan(text='')
|
24
|
+
Scan.new(@words).scan(text)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module SensitiveWordFilter
|
2
|
+
class DfaTree
|
3
|
+
Word = Struct.new(:is_end, :value)
|
4
|
+
|
5
|
+
attr_accessor :words, :tree
|
6
|
+
|
7
|
+
def initialize(words=nil)
|
8
|
+
@tree = Hash.new
|
9
|
+
@words = words || []
|
10
|
+
end
|
11
|
+
|
12
|
+
def dfa_tree
|
13
|
+
words.each do |word|
|
14
|
+
word_hash = tree
|
15
|
+
|
16
|
+
_word = word.strip
|
17
|
+
word_length = _word.length
|
18
|
+
|
19
|
+
(1..word_length).each do |i|
|
20
|
+
c = _word[i-1]
|
21
|
+
if word_hash[c].nil?
|
22
|
+
if i == word_length
|
23
|
+
word_hash[c] = Word.new(true, Hash.new)
|
24
|
+
else
|
25
|
+
word_hash[c] = Word.new(false, Hash.new)
|
26
|
+
word_hash = word_hash[c].value
|
27
|
+
end
|
28
|
+
else
|
29
|
+
word_hash = word_hash[c].value
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
tree
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module SensitiveWordFilter
|
2
|
+
class Load
|
3
|
+
attr_accessor :words
|
4
|
+
attr_reader :file_paths
|
5
|
+
|
6
|
+
def initialize(file_paths)
|
7
|
+
@file_paths = file_paths
|
8
|
+
@words = Array.new
|
9
|
+
end
|
10
|
+
|
11
|
+
def load
|
12
|
+
if file_paths.is_a?(Array)
|
13
|
+
file_paths.each do |file_path|
|
14
|
+
_load(file_path)
|
15
|
+
end
|
16
|
+
else
|
17
|
+
_load(file_paths)
|
18
|
+
end
|
19
|
+
|
20
|
+
words
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def _load(file_path)
|
26
|
+
f = File.open(file_path, "r")
|
27
|
+
f.each_line do |line|
|
28
|
+
words.push(line.gsub("\n", ''))
|
29
|
+
end
|
30
|
+
f.close
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require "sensitive_word_filter/dfa_tree"
|
2
|
+
|
3
|
+
module SensitiveWordFilter
|
4
|
+
class Scan
|
5
|
+
attr_accessor :sensitive_map
|
6
|
+
|
7
|
+
def initialize(words=[])
|
8
|
+
@sensitive_map = DfaTree.new(words).dfa_tree
|
9
|
+
end
|
10
|
+
|
11
|
+
def scan(text = '')
|
12
|
+
return [] if text.nil?
|
13
|
+
_scan(gsub_text(text))
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def _scan(text = '', sensitive_words = [], sensitive_hash = sensitive_map, temp_text = '')
|
19
|
+
c = text[0]
|
20
|
+
return sensitive_words if text.empty?
|
21
|
+
return sensitive_words if c.empty?
|
22
|
+
|
23
|
+
text_size = text.length
|
24
|
+
w = sensitive_hash[c]
|
25
|
+
|
26
|
+
if w.nil?
|
27
|
+
if temp_text.size > 0
|
28
|
+
new_text = temp_text + text
|
29
|
+
new_length = new_text.length
|
30
|
+
text = new_text[1..(new_length-1)]
|
31
|
+
else
|
32
|
+
text = text[1..(text_size-1)]
|
33
|
+
end
|
34
|
+
temp_text = ''
|
35
|
+
send(__method__, text, sensitive_words)
|
36
|
+
else
|
37
|
+
temp_text += c
|
38
|
+
text = text[1..(text_size-1)]
|
39
|
+
if w.is_end
|
40
|
+
sensitive_words.push(temp_text)
|
41
|
+
send(__method__, text, sensitive_words)
|
42
|
+
else
|
43
|
+
send(__method__, text, sensitive_words, w.value, temp_text)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def gsub_text(text)
|
49
|
+
text.strip.gsub(/[`~!@#$^&*()=|{}':;',\\\[\]\.<>\/?~!@#¥……&*()——|{}【】';:""'。,、?]|\s/,'')
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
metadata
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sensitive_word_filter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- sanm1992
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-10-10 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: sensitive word filter base on DFA
|
14
|
+
email:
|
15
|
+
- 1320695403@qq.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- README.md
|
21
|
+
- lib/sensitive_word_filter.rb
|
22
|
+
- lib/sensitive_word_filter/dfa_tree.rb
|
23
|
+
- lib/sensitive_word_filter/load.rb
|
24
|
+
- lib/sensitive_word_filter/scan.rb
|
25
|
+
- lib/sensitive_word_filter/version.rb
|
26
|
+
homepage: https://github.com/sanm1992/sensitive_word_filter
|
27
|
+
licenses:
|
28
|
+
- MIT
|
29
|
+
metadata:
|
30
|
+
allowed_push_host: https://rubygems.org
|
31
|
+
homepage_uri: https://github.com/sanm1992/sensitive_word_filter
|
32
|
+
source_code_uri: https://github.com/sanm1992/sensitive_word_filter
|
33
|
+
changelog_uri: https://github.com/sanm1992/sensitive_word_filter
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options: []
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 2.3.0
|
43
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
requirements: []
|
49
|
+
rubyforge_project:
|
50
|
+
rubygems_version: 2.7.6.2
|
51
|
+
signing_key:
|
52
|
+
specification_version: 4
|
53
|
+
summary: sensitive word filter
|
54
|
+
test_files: []
|