cf-swearjar 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,32 @@
1
+ require 'yaml'
2
+ require 'fuzzy_hash'
3
+ require 'bloomfilter'
4
+
5
+ class Swearjar
6
+ class Tester
7
+
8
+ def initialize(config_file)
9
+ data = YAML.load_file
10
+
11
+ @tester = FuzzyHash.new
12
+
13
+ data['regex'].each do |pattern, type|
14
+ @tester[Regexp.new(pattern)] = type
15
+ end
16
+
17
+ data['simple'].each do |test, type|
18
+ @tester[test] = type
19
+ end
20
+
21
+ end
22
+
23
+ def scan(string, &block)
24
+ string.scan(/\b[\b]+\b/, &block)
25
+ end
26
+
27
+ def profane?(string)
28
+ scan(string) {|w| return true}
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ class Swearjar
2
+ VERSION = '1.0.1'
3
+ end
data/lib/swearjar.rb ADDED
@@ -0,0 +1,63 @@
1
+ require 'yaml'
2
+ require 'fuzzy_hash'
3
+
4
+ class Swearjar
5
+
6
+ def self.default
7
+ from_language
8
+ end
9
+
10
+ def self.from_language(language = 'en')
11
+ new(File.join(File.dirname(__FILE__), 'config', "#{language}.yml"))
12
+ end
13
+
14
+ attr_reader :tester, :hash
15
+
16
+ def initialize(file = nil)
17
+ @tester = FuzzyHash.new
18
+ @hash = {}
19
+ load_file(file) if file
20
+ end
21
+
22
+ def load_file(file)
23
+ data = YAML.load_file(file)
24
+
25
+ data['regex'].each do |pattern, type|
26
+ @tester[Regexp.new(pattern)] = type
27
+ end if data['regex']
28
+
29
+ data['simple'].each do |test, type|
30
+ @hash[test] = type
31
+ end if data['simple']
32
+ end
33
+
34
+ def scan(string, &block)
35
+ string = string.to_s
36
+ string.scan(/\b[a-zA-Z-]+\b/) do |word|
37
+ block.call(word, hash[word.downcase] || hash[word.downcase.gsub(/e?s$/,'')] )
38
+ end
39
+ if match = tester.match_with_result(string)
40
+ block.call(match.last, match.first)
41
+ end
42
+ end
43
+
44
+ def profane?(string)
45
+ string = string.to_s
46
+ scan(string) {|word, test| return true if !test.nil?}
47
+ return false
48
+ end
49
+
50
+ def scorecard(string)
51
+ string = string.to_s
52
+ scorecard = {}
53
+ scan(string) {|word, test| test.each { |type| scorecard.key?(type) ? scorecard[type] += 1 : scorecard[type] = 1} if test}
54
+ scorecard
55
+ end
56
+
57
+ def censor(string)
58
+ censored_string = string.to_s.dup
59
+ scan(string) {|word, test| censored_string.gsub!(word, block_given? ? yield(word) : word.gsub(/\S/, '*')) if test}
60
+ censored_string
61
+ end
62
+
63
+ end
@@ -0,0 +1,2 @@
1
+ simple:
2
+ "python": ["sexual"]
data/spec/spec.opts ADDED
@@ -0,0 +1,7 @@
1
+ --colour
2
+ --format
3
+ specdoc
4
+ --loadby
5
+ mtime
6
+ --reverse
7
+ --backtrace
@@ -0,0 +1 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'swearjar'))
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+
3
+ describe Swearjar do
4
+
5
+ it "should detect dirty words" do
6
+ Swearjar.default.profane?('fuck you jim henson').should be_true
7
+ end
8
+
9
+ it "should detect dirty words regardless of case" do
10
+ Swearjar.default.profane?('FuCk you jim henson').should be_true
11
+ end
12
+
13
+ it "should not detect non-dirty words" do
14
+ Swearjar.default.profane?('i love you jim henson').should be_false
15
+ end
16
+
17
+ it "should give us a scorecard" do
18
+ Swearjar.default.scorecard('fuck you jim henson').should == {'sexual'=>1}
19
+ end
20
+
21
+ it "should detect multiword" do
22
+ Swearjar.default.scorecard('jim henson has a hard on').should == {'sexual'=>1}
23
+ end
24
+
25
+ it "should detect multiword plurals" do
26
+ Swearjar.default.scorecard('jim henson has a hard ons').should == {'sexual'=>1}
27
+ end
28
+
29
+ it "should detect simple dirty plurals" do
30
+ Swearjar.default.profane?('jim henson had two dicks').should be_true
31
+ Swearjar.default.profane?('jim henson has two asses').should be_true
32
+ end
33
+
34
+ it "should censor a string" do
35
+ Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody').should == 'jim henson has a massive **** ** he is gonna use to **** everybody'
36
+ end
37
+
38
+ it "should not do much when given a non-string" do
39
+ Swearjar.default.profane?(nil).should be_false
40
+ end
41
+
42
+ it "should allow you to load a new yaml file" do
43
+ sj = Swearjar.new
44
+ sj.load_file(File.expand_path('../data/swear.yml', __FILE__))
45
+ sj.censor("Python is the best language!").should == "****** is the best language!"
46
+ end
47
+
48
+ end
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cf-swearjar
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Joshua Hull
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fuzzyhash
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.0.11
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.0.11
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 0.8.7
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 0.8.7
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 1.3.0
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.3.0
62
+ description: Put another nickel in the swearjar. Simple profanity detection with content
63
+ analysis.
64
+ email: joshbuddy@gmail.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files:
68
+ - README.rdoc
69
+ files:
70
+ - .gitignore
71
+ - Gemfile
72
+ - README.rdoc
73
+ - Rakefile
74
+ - cf-swearjar.gemspec
75
+ - lib/config/en.yml
76
+ - lib/swearjar.rb
77
+ - lib/swearjar/tester.rb
78
+ - lib/swearjar/version.rb
79
+ - spec/data/swear.yml
80
+ - spec/spec.opts
81
+ - spec/spec_helper.rb
82
+ - spec/swearjar_spec.rb
83
+ homepage: http://github.com/joshbuddy/swearjar
84
+ licenses: []
85
+ post_install_message:
86
+ rdoc_options:
87
+ - --charset=UTF-8
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ none: false
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project: swearjar
104
+ rubygems_version: 1.8.24
105
+ signing_key:
106
+ specification_version: 3
107
+ summary: Put another nickel in the swearjar. Simple profanity detection with content
108
+ analysis
109
+ test_files:
110
+ - spec/data/swear.yml
111
+ - spec/spec.opts
112
+ - spec/spec_helper.rb
113
+ - spec/swearjar_spec.rb