ramparts 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/stale.yml +24 -0
- data/.gitignore +1 -0
- data/.rspec +1 -0
- data/.rubocop.yml +54 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.md +18 -0
- data/CONTRIBUTING.md +46 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +52 -0
- data/LICENSE.md +21 -0
- data/README.md +226 -0
- data/ROADMAP.md +21 -0
- data/Rakefile +0 -0
- data/lib/ramparts.rb +3 -0
- data/lib/ramparts/base.rb +99 -0
- data/lib/ramparts/data/list_of_email_domains.rb +73 -0
- data/lib/ramparts/helpers.rb +46 -0
- data/lib/ramparts/parsers/email_parser.rb +88 -0
- data/lib/ramparts/parsers/phone_parser.rb +137 -0
- data/lib/ramparts/parsers/url_parser.rb +30 -0
- data/lib/ramparts/version.rb +5 -0
- data/ramparts.gemspec +24 -0
- data/spec/data/email_and_phone_data/falsy_email_and_phone_data.rb +6 -0
- data/spec/data/email_and_phone_data/truthy_email_and_phone_data.rb +33 -0
- data/spec/data/email_data/falsy_email_data.rb +6 -0
- data/spec/data/email_data/truthy_email_data.rb +87 -0
- data/spec/data/phone_data/falsy_phone_data.rb +6 -0
- data/spec/data/phone_data/truthy_phone_data.rb +109 -0
- data/spec/data/url_data/falsy_url_data.rb +6 -0
- data/spec/data/url_data/truthy_url_data.rb +12 -0
- data/spec/parsers/email_and_phone_parser_spec.rb +44 -0
- data/spec/parsers/email_parser_spec.rb +60 -0
- data/spec/parsers/phone_parser_spec.rb +56 -0
- data/spec/parsers/url_parser_spec.rb +15 -0
- data/spec/spec_constants.rb +3 -0
- data/spec/spec_helper.rb +87 -0
- metadata +147 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../helpers'
|
|
4
|
+
|
|
5
|
+
# Parses text and attempts to find urls
|
|
6
|
+
class UrlParser
|
|
7
|
+
# Counts the number of occurrences of that url within the block of text
|
|
8
|
+
def count_url_instances(text, options)
|
|
9
|
+
raise ArgumentError, ARGUMENT_ERROR_TEXT unless text.is_a? String
|
|
10
|
+
|
|
11
|
+
text = parse_url(text)
|
|
12
|
+
url_instances(text, options).length
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
BAD_URL_REGEX = Regexp.union(/cialis/, /viagra/)
|
|
18
|
+
|
|
19
|
+
# Parses the url to make it easier to search
|
|
20
|
+
def parse_url(text)
|
|
21
|
+
text.downcase
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Returns the instances that match the regex
|
|
25
|
+
def url_instances(text, _options)
|
|
26
|
+
text
|
|
27
|
+
.enum_for(:scan, BAD_URL_REGEX)
|
|
28
|
+
.map { { offset: Regexp.last_match.begin(0), value: Regexp.last_match.to_s.strip } }
|
|
29
|
+
end
|
|
30
|
+
end
|
data/ramparts.gemspec
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
|
3
|
+
require 'ramparts/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |s|
|
|
6
|
+
s.name = 'ramparts'
|
|
7
|
+
s.version = Ramparts::VERSION
|
|
8
|
+
s.platform = Gem::Platform::RUBY
|
|
9
|
+
s.authors = ['Brent Scheibelhut', 'CareGuide']
|
|
10
|
+
s.email = ['brent.scheibelhut@careguide.com', 'info@careguide.com']
|
|
11
|
+
s.homepage = 'https://github.com/CareGuide/ramparts'
|
|
12
|
+
s.license = 'MIT'
|
|
13
|
+
s.summary = %q{Parses blocks of text to find phone numbers (including phonetic numbers), emails, and bad url}
|
|
14
|
+
s.description = %q{Parses blocks of text to find phone numbers (including phonetic numbers), emails, and bad url. Useful for finding scammers who tend to try to post their phone number in messages.}
|
|
15
|
+
|
|
16
|
+
s.add_development_dependency 'rspec', '~> 2.5', '>= 2.5.0'
|
|
17
|
+
s.add_development_dependency 'rubocop', '~>0.51.0'
|
|
18
|
+
s.add_development_dependency 'simplecov', '~>0.15.1'
|
|
19
|
+
|
|
20
|
+
s.files = `git ls-files`.split("\n")
|
|
21
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
22
|
+
s.executables = `git ls-files -- bin/*`.split('\n').map{ |f| File.basename(f) }
|
|
23
|
+
s.require_paths = ['lib']
|
|
24
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../spec_constants'
|
|
4
|
+
|
|
5
|
+
# Please place phone number answers first for testing
|
|
6
|
+
# It shouldn't matter if we implemented stronger test matchers
|
|
7
|
+
EMAIL_PHONE_TRUTHY_WITH_ANSWERS = [
|
|
8
|
+
{
|
|
9
|
+
matches: ["jbash042@example.com"],
|
|
10
|
+
text: "My name is Cynthia, a friend of mine needs a nanny to watch her baby in your area, her contact is ( jbash042@example.com ) She will be waiting to hear from you kindly send her an email now!",
|
|
11
|
+
filtered: "My name is Cynthia, a friend of mine needs a nanny to watch her baby in your area, her contact is ( #{INSERTABLE} ) She will be waiting to hear from you kindly send her an email now!"
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
matches: ["216.555.FOUR FIVE EIGHT NINE", "jbash042@example.com"],
|
|
15
|
+
text: "My name is Cynthia, a friend of mine needs a nanny to watch her baby in your area, her contact is ( jbash042@example.com ) or 216.555.FOUR FIVE EIGHT NINE!",
|
|
16
|
+
filtered: "My name is Cynthia, a friend of mine needs a nanny to watch her baby in your area, her contact is ( #{INSERTABLE} ) or #{INSERTABLE}!"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
matches: ["432-555-5454", "johnkrueger@example.com"],
|
|
20
|
+
text: "You can contact me at johnkrueger@example.com, or call me at 432-555-5454. Please get in touch.",
|
|
21
|
+
filtered: "You can contact me at #{INSERTABLE}, or call me at #{INSERTABLE}. Please get in touch."
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
matches: ["416-555-5454", "john.krueger@example.com"],
|
|
25
|
+
text: "You can contact me at john.krueger@example.com, or call me at 416-555-5454. Please get in touch.",
|
|
26
|
+
filtered: "You can contact me at #{INSERTABLE}, or call me at #{INSERTABLE}. Please get in touch."
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
matches: ["416-555-5454", "john.krueger@example.com"],
|
|
30
|
+
text: "You can contact me at john.krueger@example.com, or call me at 416-555-5454. Please get in touch.",
|
|
31
|
+
filtered: "You can contact me at #{INSERTABLE}, or call me at #{INSERTABLE}. Please get in touch."
|
|
32
|
+
}
|
|
33
|
+
].freeze
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../spec_constants'
|
|
4
|
+
|
|
5
|
+
EMAIL_TRUTHY_WITH_ANSWERS = [
|
|
6
|
+
{
|
|
7
|
+
matches: ["jbash042@example.com"],
|
|
8
|
+
text: "My name is Cynthia, a friend of mine needs a nanny to watch her baby in your area, her contact is ( jbash042@example.com ) She will be waiting to hear from you kindly send her an email now!",
|
|
9
|
+
filtered: "My name is Cynthia, a friend of mine needs a nanny to watch her baby in your area, her contact is ( #{INSERTABLE} ) She will be waiting to hear from you kindly send her an email now!"
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
matches: ["Virginiak002@example.com"],
|
|
13
|
+
text: "Hello dear. My name is Virginia,I'm currently in Texas but in process of moving to your area,I read your profile on nannylane.com and will like to have you as my sons nanny. Starting from on the 27 of November,9AM-1PM,Mon-Fri or on weekends basis.They are 5&1+ years respectively,i am willing to pay $20/hr am sure these is good for you,just because i want the best care for my sons.Email me your resume/references to (Virginiak002@example.com) for more details,Hope to read from you soon",
|
|
14
|
+
filtered: "Hello dear. My name is Virginia,I'm currently in Texas but in process of moving to your area,I read your profile on nannylane.com and will like to have you as my sons nanny. Starting from on the 27 of November,9AM-1PM,Mon-Fri or on weekends basis.They are 5&1+ years respectively,i am willing to pay $20/hr am sure these is good for you,just because i want the best care for my sons.Email me your resume/references to (#{INSERTABLE}) for more details,Hope to read from you soon"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
matches: ["jbash042@example.com", "flavourjames @ example dot com"],
|
|
18
|
+
text: "My name is Cynthia, a friend of mine needs a nanny to watch her baby in your area, her contact is ( jbash042@example.com ) She will be waiting to hear from you kindly send her an email now! flavourjames @ example dot com",
|
|
19
|
+
filtered: "My name is Cynthia, a friend of mine needs a nanny to watch her baby in your area, her contact is ( #{INSERTABLE} ) She will be waiting to hear from you kindly send her an email now! #{INSERTABLE}"
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
matches: ["ashley72299 @ example dot co dot uk"],
|
|
23
|
+
text: "Hi, Are you seriously interested ..Looking for honest worker .. My e-mail is ashley72299 @ example dot co dot uk . Am available and will like to know the amount you charge per hr ?.. Ashley",
|
|
24
|
+
filtered: "Hi, Are you seriously interested ..Looking for honest worker .. My e-mail is #{INSERTABLE} . Am available and will like to know the amount you charge per hr ?.. Ashley"
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
matches: ["MichaelZachary817 @ example dot com"],
|
|
28
|
+
text: "Hi, Are you a baby sitter? If yes,I am in need of a babysitter/Nanny for my 3 yr old daughter clara,If you are interested, Kindly contact me for more information on MichaelZachary817 @ example dot com for me details of the Job.",
|
|
29
|
+
filtered: "Hi, Are you a baby sitter? If yes,I am in need of a babysitter/Nanny for my 3 yr old daughter clara,If you are interested, Kindly contact me for more information on #{INSERTABLE} for me details of the Job."
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
matches: ["josepamela12000 @ example.com"],
|
|
33
|
+
text: "Hello, My name is Pamela, I read your profile on sitter.com on the possition of nanny, and i would like you to have you in taking care of my son, am always busy due to work issues and other housing issues, this job starting from 30th of November. I'm willing to pay $30/hour, 9am-1pm, Mondays-Thursdays 'or' 9am- 4pm Weekend basis, you can send me an email confirming your interest, or email me with your resume at (josepamela12000 @ example.com) to know your qualification, and your passion for the job. will be glad to work with you, await your email Thanks. Pamela",
|
|
34
|
+
filtered: "Hello, My name is Pamela, I read your profile on sitter.com on the possition of nanny, and i would like you to have you in taking care of my son, am always busy due to work issues and other housing issues, this job starting from 30th of November. I'm willing to pay $30/hour, 9am-1pm, Mondays-Thursdays 'or' 9am- 4pm Weekend basis, you can send me an email confirming your interest, or email me with your resume at (#{INSERTABLE}) to know your qualification, and your passion for the job. will be glad to work with you, await your email Thanks. Pamela"
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
matches: ["flavorjames0022 @ example DOT com"],
|
|
38
|
+
text: "I am moving down to your town and i need responsible pet sitting service for my American bulldog,Plz i want you to just email me now to get more details from me with days hours and weekly payment and my email is [flavorjames0022 @ example DOT com ]i want you to make sure you email me with your callphone number and i will be paying 300 bucks weeky.",
|
|
39
|
+
filtered: "I am moving down to your town and i need responsible pet sitting service for my American bulldog,Plz i want you to just email me now to get more details from me with days hours and weekly payment and my email is [#{INSERTABLE} ]i want you to make sure you email me with your callphone number and i will be paying 300 bucks weeky."
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
matches: ["emmalineflouress1104 @ example DOT com"],
|
|
43
|
+
text: "I am moving down to your town and I need responsible pet sitter service for my American bulldog ,plz I want you to just email me now to get more details from me with days hours and weekly payment and my email is(emmalineflouress1104 @ example DOT com ) I want you to make sure you email me with your cellphone number and I will be paying $300 weekly.Flores.",
|
|
44
|
+
filtered: "I am moving down to your town and I need responsible pet sitter service for my American bulldog ,plz I want you to just email me now to get more details from me with days hours and weekly payment and my email is(#{INSERTABLE} ) I want you to make sure you email me with your cellphone number and I will be paying $300 weekly.Flores."
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
matches: ["rosefale @ example dot com"],
|
|
48
|
+
text: "Hi Felicia! I saw your profile and I'm looking for someone that will care for my son David and make him happy, he's 9 years old lovely and fun to be with, i will offer $20 per hr. We are responsible and easy going family.Send your resume or your availability to my email address so that i can tell you more about what i want for my son. E-mail me on rosefale @ example dot com",
|
|
49
|
+
filtered: "Hi Felicia! I saw your profile and I'm looking for someone that will care for my son David and make him happy, he's 9 years old lovely and fun to be with, i will offer $20 per hr. We are responsible and easy going family.Send your resume or your availability to my email address so that i can tell you more about what i want for my son. E-mail me on #{INSERTABLE}"
|
|
50
|
+
}
|
|
51
|
+
].freeze
|
|
52
|
+
|
|
53
|
+
EMAIL_TRUTHY_AT = [
|
|
54
|
+
{
|
|
55
|
+
matches: ["josepakela12000 at example.com"],
|
|
56
|
+
text: "Hello, My name is Pamela, I read your profile on sitter.com on the possition of nanny, and i would like you to have you in taking care of my son, am always busy due to work issues and other housing issues, this job starting from 30th of November. I'm willing to pay $30/hour, 9am-1pm, Mondays-Thursdays 'or' 9am- 4pm Weekend basis, you can send me an email confirming your interest, or email me with your resume at (josepakela12000 at example.com) to know your qualification, and your passion for the job. will be glad to work with you, await your email Thanks. Pamela",
|
|
57
|
+
filtered: "Hello, My name is Pamela, I read your profile on sitter.com on the possition of nanny, and i would like you to have you in taking care of my son, am always busy due to work issues and other housing issues, this job starting from 30th of November. I'm willing to pay $30/hour, 9am-1pm, Mondays-Thursdays 'or' 9am- 4pm Weekend basis, you can send me an email confirming your interest, or email me with your resume at (#{INSERTABLE}) to know your qualification, and your passion for the job. will be glad to work with you, await your email Thanks. Pamela"
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
matches: ["flavorjames0022 AT example DOT com"],
|
|
61
|
+
text: "I am moving down to your town and i need responsible pet sitting service for my American bulldog,Plz i want you to just email me now to get more details from me with days hours and weekly payment and my email is [flavorjames0022 AT example DOT com ]i want you to make sure you email me with your callphone number and i will be paying 300 bucks weeky.",
|
|
62
|
+
filtered: "I am moving down to your town and i need responsible pet sitting service for my American bulldog,Plz i want you to just email me now to get more details from me with days hours and weekly payment and my email is [#{INSERTABLE} ]i want you to make sure you email me with your callphone number and i will be paying 300 bucks weeky."
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
matches: ["emmalineflouress1104 AT example DOT com"],
|
|
66
|
+
text: "I am moving down to your town and I need responsible pet sitter service for my American bulldog ,plz I want you to just email me now to get more details from me with days hours and weekly payment and my email is(emmalineflouress1104 AT example DOT com ) I want you to make sure you email me with your cellphone number and I will be paying $300 weekly.Flores.",
|
|
67
|
+
filtered: "I am moving down to your town and I need responsible pet sitter service for my American bulldog ,plz I want you to just email me now to get more details from me with days hours and weekly payment and my email is(#{INSERTABLE} ) I want you to make sure you email me with your cellphone number and I will be paying $300 weekly.Flores."
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
matches: ["ashley72299 AT example dot co dot uk"],
|
|
71
|
+
text: "Hi, Are you seriously interested ..Looking for honest worker .. My e-mail is ashley72299 AT example dot co dot uk . Am available and will like to know the amount you charge per hr ?.. Ashley",
|
|
72
|
+
filtered: "Hi, Are you seriously interested ..Looking for honest worker .. My e-mail is #{INSERTABLE} . Am available and will like to know the amount you charge per hr ?.. Ashley"
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
matches: ["MichaelZachary817 at example dot com"],
|
|
76
|
+
text: "Hi, Are you a baby sitter? If yes,I am in need of a babysitter/Nanny for my 3 yr old daughter clara,If you are interested, Kindly contact me for more information on MichaelZachary817 at example dot com for me details of the Job.",
|
|
77
|
+
filtered: "Hi, Are you a baby sitter? If yes,I am in need of a babysitter/Nanny for my 3 yr old daughter clara,If you are interested, Kindly contact me for more information on #{INSERTABLE} for me details of the Job."
|
|
78
|
+
}
|
|
79
|
+
].freeze
|
|
80
|
+
|
|
81
|
+
EMAIL_TRUTHY_AGGRESSIVE = [
|
|
82
|
+
{
|
|
83
|
+
matches: ["w i l l h o l d 1 1 (at) example"],
|
|
84
|
+
text: "Hello Detra. I saw that you are looking for a home owner. I think that we would be a good match. shoot me a message at w i l l h o l d 1 1 (at) example",
|
|
85
|
+
filtered: "Hello Detra. I saw that you are looking for a home owner. I think that we would be a good match. shoot me a message at #{INSERTABLE}"
|
|
86
|
+
}
|
|
87
|
+
].freeze
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../spec_constants'
|
|
4
|
+
|
|
5
|
+
PHONE_TRUTHY_WITH_ANSWERS_AND_SPACES = [
|
|
6
|
+
{
|
|
7
|
+
matches: ["f i v e - ( F I V E . F I V E . 5 . E I G H T . n i N E . F O U R . T W O . E I G H T . SIX . FIVE"],
|
|
8
|
+
text: "If you're interested in this position, do contact me directly on my phone number f i v e - ( F I V E . F I V E . 5 . E I G H T . n i N E . F O U R . T W O . E I G H T . SIX . FIVE ).",
|
|
9
|
+
filtered: "If you're interested in this position, do contact me directly on my phone number #{INSERTABLE})."
|
|
10
|
+
}
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
PHONE_TRUTHY_WITH_ANSWERS = [
|
|
14
|
+
{
|
|
15
|
+
matches: ["5.5.5.4.3.8.4.8.3.8"],
|
|
16
|
+
text: "I need a babysitter and errand for my son textme direct on my number if you are interested 5.5.5.4.3.8.4.8.3.8",
|
|
17
|
+
filtered: "I need a babysitter and errand for my son textme direct on my number if you are interested #{INSERTABLE}"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
matches: ["2545005290"],
|
|
21
|
+
text: "Aron's phone number is 2545005290",
|
|
22
|
+
filtered: "Aron's phone number is #{INSERTABLE}"
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
matches: ["three four seven five 5 five five one zero two"],
|
|
26
|
+
text: "i Rebecca! I saw your profile and would like to know more. my name is Adriana Medrano, from TIDY N' CLEAN CLEANING SERVICES LLC, this message regarding your request for housekeeping services, I'm a very responsable and honest house cleaner, let me help you clean your house. please call or text me at three four seven five 5 five five one zero two, I'm looking forward to serve you",
|
|
27
|
+
filtered: "i Rebecca! I saw your profile and would like to know more. my name is Adriana Medrano, from TIDY N' CLEAN CLEANING SERVICES LLC, this message regarding your request for housekeeping services, I'm a very responsable and honest house cleaner, let me help you clean your house. please call or text me at #{INSERTABLE}, I'm looking forward to serve you"
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
matches: ["(890) 555-4270"],
|
|
31
|
+
text: "Hi Felicia! I saw that you are looking for a family. Would you like to chat sometime soon?(890) 555-4270",
|
|
32
|
+
filtered: "Hi Felicia! I saw that you are looking for a family. Would you like to chat sometime soon?#{INSERTABLE}"
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
matches: ["FOUR ONE FIVE FIVE FIVE FIVE TWO EIGHT SIX FIVE"],
|
|
36
|
+
text: "If you're interested in this position, do contact me directly on my phone number ( FOUR ONE FIVE FIVE FIVE FIVE TWO EIGHT SIX FIVE ). Hope you cracked that number code.",
|
|
37
|
+
filtered: "If you're interested in this position, do contact me directly on my phone number ( #{INSERTABLE} ). Hope you cracked that number code."
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
matches: ["FOUR ONE FIVE FIVE FIVE FIVE TWO EIGHT SIX FIVE", "FOUR ONE FIVE FIVE FIVE FIVE TWO EIGHT SIX FIVE"],
|
|
41
|
+
text: "If you're interested in this position, do contact me directly on my phone number ( FOUR ONE FIVE FIVE FIVE FIVE TWO EIGHT SIX FIVE ). Hope you cracked that number code. FOUR ONE FIVE FIVE FIVE FIVE TWO EIGHT SIX FIVE",
|
|
42
|
+
filtered: "If you're interested in this position, do contact me directly on my phone number ( #{INSERTABLE} ). Hope you cracked that number code. #{INSERTABLE}"
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
matches: ["(1THREE4) 555 FOUR OH FIVE 8/9.TWO.1"],
|
|
46
|
+
text: "(1THREE4) 555 FOUR OH FIVE 8/9.TWO.1",
|
|
47
|
+
filtered: INSERTABLE
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
matches: ["two,zero,nine,five,five,five,eight,nine,four,seven"],
|
|
51
|
+
text: "Hi there, I hope you’re having a great week. I am quite impressed with your profile on here and would like to know more. I am looking to employ an experienced, loyal, dedicated, caring and responsible person to look after my house due to my very busy schedule with work. If you're interested in this position, do contact me directly on my phone number ( two,zero,nine,five,five,five,eight,nine,four,seven). Hope you cracked that number code. I'd prefer you text first with an introduction then I can give you a call back. Have a great day",
|
|
52
|
+
filtered: "Hi there, I hope you’re having a great week. I am quite impressed with your profile on here and would like to know more. I am looking to employ an experienced, loyal, dedicated, caring and responsible person to look after my house due to my very busy schedule with work. If you're interested in this position, do contact me directly on my phone number ( #{INSERTABLE}). Hope you cracked that number code. I'd prefer you text first with an introduction then I can give you a call back. Have a great day"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
matches: ["six.one.nine.five.five.five.five.seven.four.two"],
|
|
56
|
+
text: "Hello how are you doing today? i'm Andrea, we are moving to your area and i would need a baby sitter and errands, at your convenient time kindly text me at six.one.nine.five.five.five.five.seven.four.two",
|
|
57
|
+
filtered: "Hello how are you doing today? i'm Andrea, we are moving to your area and i would need a baby sitter and errands, at your convenient time kindly text me at #{INSERTABLE}"
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
matches: ["Five. four. nine. five. five. five. Two. nine. zero. three"],
|
|
61
|
+
text: "Hello, My name is Mary, We are moving to your area.I need a baby sitter service for my 6 years old son also can run an errand at your convenience time... I will like to hear back from you and here is my number..Five. four. nine. five. five. five. Two. nine. zero. three. ..I will be waiting for your text if you are interested.Thanks",
|
|
62
|
+
filtered: "Hello, My name is Mary, We are moving to your area.I need a baby sitter service for my 6 years old son also can run an errand at your convenience time... I will like to hear back from you and here is my number..#{INSERTABLE}. ..I will be waiting for your text if you are interested.Thanks"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
matches: ["2.1.2.Five.Five.FIve.7.2.0.4"],
|
|
66
|
+
text: "Hello this is tiara happy sunday am just new on this site i need a serious and God fearing person sitter for my 6yrs old son am moving to your neighbourhood if you are interested in working with me kindly get back to me asap! contact me direct on my digit 2.1.2.Five.Five.FIve.7.2.0.4",
|
|
67
|
+
filtered: "Hello this is tiara happy sunday am just new on this site i need a serious and God fearing person sitter for my 6yrs old son am moving to your neighbourhood if you are interested in working with me kindly get back to me asap! contact me direct on my digit #{INSERTABLE}"
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
matches: ["5745551712"],
|
|
71
|
+
text: "Hey, have family forever here at 1160 Marion St. Niles, MI 45920 and ph 5745551712, yet on road behind Mc Donalds at white house two bedroom washer dryer stove fridge an stay of creatures mostly invisible no one at home usually but me kids just play cause heavenly delight anywhere door to front open back door one always a fenced yard and shed any questions drive by or come on in my home is always yours just open door ask anything seek anything knock at any idea you choose. you would be the boss of all that is my children's and as a nanny I assure you they will listen o call not",
|
|
72
|
+
filtered: "Hey, have family forever here at 1160 Marion St. Niles, MI 45920 and ph #{INSERTABLE}, yet on road behind Mc Donalds at white house two bedroom washer dryer stove fridge an stay of creatures mostly invisible no one at home usually but me kids just play cause heavenly delight anywhere door to front open back door one always a fenced yard and shed any questions drive by or come on in my home is always yours just open door ask anything seek anything knock at any idea you choose. you would be the boss of all that is my children's and as a nanny I assure you they will listen o call not"
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
matches: ["seven zero three five five five zero seven nine three"],
|
|
76
|
+
text: "Hi, I'm looking for housekeeper to clean up apartment. You can text me at seven zero three five five five zero seven nine three for more details. Danny",
|
|
77
|
+
filtered: "Hi, I'm looking for housekeeper to clean up apartment. You can text me at #{INSERTABLE} for more details. Danny"
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
matches: ["(609) 555-2092"],
|
|
81
|
+
text: "Hey Anna! Have you found a family yet? I think that we would be a good match.here is my contact for easy communication (609) 555-2092",
|
|
82
|
+
filtered: "Hey Anna! Have you found a family yet? I think that we would be a good match.here is my contact for easy communication #{INSERTABLE}"
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
matches: ["778-555-2900"],
|
|
86
|
+
text: "1/10082 Williams RD Chilliwack BC V2P-5H2 Bradley Mayo Called me 778-555-2900 Yes Bradley Mayo Home by myself Girl with latex gloves",
|
|
87
|
+
filtered: "1/10082 Williams RD Chilliwack BC V2P-5H2 Bradley Mayo Called me #{INSERTABLE} Yes Bradley Mayo Home by myself Girl with latex gloves"
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
matches: ["4.0.4.Five.Five,Five.6.4.0.8"],
|
|
91
|
+
text: "Hello this is Mary seeking for sitter to babysit my son if you interested kind get back to me asap!contact me direct on my digit number 4.0.4.Five.Five,Five.6.4.0.8",
|
|
92
|
+
filtered: "Hello this is Mary seeking for sitter to babysit my son if you interested kind get back to me asap!contact me direct on my digit number #{INSERTABLE}"
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
matches: ["THREE, ONE, TWO, FIVE, FIVE, FIVE, SIX, TWO, FIVE, FOUR"],
|
|
96
|
+
text: "Hi there, I hope you’re having a great week. I am quite impressed with your profile on here and would like to know more. I am looking to employ an experienced, loyal, dedicated, caring and responsible nanny to look after my kids due to my very busy schedule with work. If you're interested in this position, do contact me directly on my phone number( THREE, ONE, TWO, FIVE, FIVE, FIVE, SIX, TWO, FIVE, FOUR). Hope you cracked that number code. I'd prefer you text first with an introduction then I can give you a call back. Have a great day ahead",
|
|
97
|
+
filtered: "Hi there, I hope you’re having a great week. I am quite impressed with your profile on here and would like to know more. I am looking to employ an experienced, loyal, dedicated, caring and responsible nanny to look after my kids due to my very busy schedule with work. If you're interested in this position, do contact me directly on my phone number( #{INSERTABLE}). Hope you cracked that number code. I'd prefer you text first with an introduction then I can give you a call back. Have a great day ahead"
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
matches: ["213...555... 5283"],
|
|
101
|
+
text: "Hi there, Hope you had a great weekend. I am impressed with your profile on here and would like to know more about you. I am looking to employ an experienced,loyal, dedicated, caring and responsible person to look after my dogs due to my very busy schedule with work. If you're interested in this position, do contact me directly on my phone number 213...555... 5283. Hope you cracked that number code. I'd prefer you text first with an introduction then I can give you a call back. Have a great day ahead. tanosha",
|
|
102
|
+
filtered: "Hi there, Hope you had a great weekend. I am impressed with your profile on here and would like to know more about you. I am looking to employ an experienced,loyal, dedicated, caring and responsible person to look after my dogs due to my very busy schedule with work. If you're interested in this position, do contact me directly on my phone number #{INSERTABLE}. Hope you cracked that number code. I'd prefer you text first with an introduction then I can give you a call back. Have a great day ahead. tanosha"
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
matches: ["213........555......... 5283"],
|
|
106
|
+
text: "Hi there, Hope you had a great weekend. I am impressed with your profile on here and would like to know more about you. I am looking to employ an experienced,loyal, dedicated, caring and responsible person to look after my dogs due to my very busy schedule with work. If you're interested in this position, do contact me directly on my phone number 213........555......... 5283. Hope you cracked that number code. I'd prefer you text first with an introduction then I can give you a call back. Have a great day ahead. tanosha",
|
|
107
|
+
filtered: "Hi there, Hope you had a great weekend. I am impressed with your profile on here and would like to know more about you. I am looking to employ an experienced,loyal, dedicated, caring and responsible person to look after my dogs due to my very busy schedule with work. If you're interested in this position, do contact me directly on my phone number #{INSERTABLE}. Hope you cracked that number code. I'd prefer you text first with an introduction then I can give you a call back. Have a great day ahead. tanosha"
|
|
108
|
+
}
|
|
109
|
+
].freeze
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
URL_TRUTHY = [
|
|
4
|
+
{
|
|
5
|
+
matches: ["cialis", "cialis", "cialis", "cialis", "cialis", "cialis", "cialis"],
|
|
6
|
+
text: "cialis vs cialis spam guestbook.php?action=http://cialiswalmart.shop - cialis over the counter at walmart trimix for ed cialis over the counter at walmart- cialis from canada pharmacy this page has had cialis dongguk gallery"
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
matches: ["viagra", "viagra", "viagra", "viagra", "viagra", "viagra", "viagra", "viagra"],
|
|
10
|
+
text: "risk viagra http://viagrawithoutdoctorusa.net - viagra without a doctor prescription usa viagra tadalafil gaestebuch.htmlviagra without a doctor prescription viagra you cannot reply to topics in this forum http:juki.host-page.com4083buy viagra generic and brand.htmldiscount-viagra"
|
|
11
|
+
}
|
|
12
|
+
].freeze
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../data/email_and_phone_data/truthy_email_and_phone_data'
|
|
4
|
+
require_relative '../data/email_and_phone_data/falsy_email_and_phone_data'
|
|
5
|
+
|
|
6
|
+
describe '#count_phone_numbers_and_emails' do
|
|
7
|
+
it 'parses a number of positive test blocks correctly' do
|
|
8
|
+
test_truthy_count(EMAIL_PHONE_TRUTHY_WITH_ANSWERS, :count_phone_numbers_and_emails)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it 'parses a number of negative test blocks correctly' do
|
|
12
|
+
test_falsy_count(EMAIL_PHONE_FALSY_BLOCKS, :count_phone_numbers)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
describe '#find_phone_numbers_and_emails' do
|
|
17
|
+
it 'parses a number of positive test blocks correctly with multi method' do
|
|
18
|
+
test_truthy_finds(EMAIL_PHONE_TRUTHY_WITH_ANSWERS, :find_phone_numbers_and_emails)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it 'parses a number of positive test blocks correctly with multi method and compare option' do
|
|
22
|
+
test_truthy_finds(EMAIL_PHONE_TRUTHY_WITH_ANSWERS, :find_phone_numbers_and_emails, compare: true)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'parses a number of negative test blocks correctly with multi method' do
|
|
26
|
+
test_falsy_finds(EMAIL_PHONE_FALSY_BLOCKS, :find_phone_numbers_and_emails)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
describe '#replace_phone_numbers_and_emails' do
|
|
31
|
+
it 'replaces a number of positive test blocks correctly with multi method' do
|
|
32
|
+
test_replacements(EMAIL_PHONE_TRUTHY_WITH_ANSWERS, :replace_phone_numbers_and_emails)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'replaces a number of positive test blocks correctly with multi method and compare option' do
|
|
36
|
+
test_replacements(EMAIL_PHONE_TRUTHY_WITH_ANSWERS, :replace_phone_numbers_and_emails, compare: true)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe 'Map/Reduce to Regex Run Time' do
|
|
41
|
+
it 'times the two methods against each other' do
|
|
42
|
+
compare_run_times(EMAIL_PHONE_TRUTHY_WITH_ANSWERS, :count_phone_numbers_and_emails, :find_phone_numbers_and_emails)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../data/email_data/truthy_email_data'
|
|
4
|
+
require_relative '../data/email_data/falsy_email_data'
|
|
5
|
+
|
|
6
|
+
describe '#count_emails' do
|
|
7
|
+
it 'parses a number of positive test blocks correctly' do
|
|
8
|
+
test_truthy_count(EMAIL_TRUTHY_WITH_ANSWERS, :count_emails)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it 'parses a number of positive test blocks correctly with aggressive option' do
|
|
12
|
+
test_truthy_count(EMAIL_TRUTHY_WITH_ANSWERS, :count_emails, aggressive: true)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'parses a number of positive aggressive test blocks correctly with check_for_at option' do
|
|
16
|
+
test_truthy_count(EMAIL_TRUTHY_AT, :count_emails, check_for_at: true)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'parses a number of positive aggressive test blocks correctly with aggressive option' do
|
|
20
|
+
test_truthy_count(EMAIL_TRUTHY_AGGRESSIVE, :count_emails, aggressive: true)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'parses a number of negative test blocks correctly' do
|
|
24
|
+
test_falsy_count(EMAIL_FALSY_BLOCKS, :count_emails)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
describe '#find_emails' do
|
|
29
|
+
it 'finds a number of positive test blocks correctly' do
|
|
30
|
+
test_truthy_finds(EMAIL_TRUTHY_WITH_ANSWERS, :find_emails)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it 'finds a number of positive aggressive test blocks correctly with check_for_at option' do
|
|
34
|
+
test_truthy_finds(EMAIL_TRUTHY_AT, :find_emails, check_for_at: true)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# TODO: Update algorithm to be able to pass test with EMAIL_TRUTHY_AGGRESSIVE
|
|
38
|
+
|
|
39
|
+
it 'finds a number of negative test blocks correctly and regex only' do
|
|
40
|
+
test_falsy_finds(EMAIL_FALSY_BLOCKS, :find_emails)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
describe '#replace_emails' do
|
|
45
|
+
it 'replaces a number of positive test blocks correctly with email replacer' do
|
|
46
|
+
test_replacements(EMAIL_TRUTHY_WITH_ANSWERS, :replace_emails)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it 'replaces a number of positive aggressive test blocks correctly with check_for_at option' do
|
|
50
|
+
test_replacements(EMAIL_TRUTHY_AT, :replace_emails, check_for_at: true)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# TODO: Update algorithm to be able to pass test with EMAIL_TRUTHY_AGGRESSIVE
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
describe 'Map/Reduce to Regex Run Time' do
|
|
57
|
+
it 'times the two methods against each other' do
|
|
58
|
+
compare_run_times(EMAIL_TRUTHY_WITH_ANSWERS, :count_emails, :find_emails)
|
|
59
|
+
end
|
|
60
|
+
end
|