disavow_tool 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -1
- data/bin/disavow_tool_dev +4 -0
- data/disavow_tool.gemspec +1 -0
- data/lib/disavow_tool.rb +5 -2
- data/lib/disavow_tool/command_options.rb +9 -3
- data/lib/disavow_tool/config.rb +1 -0
- data/lib/disavow_tool/disavow_list.rb +11 -79
- data/lib/disavow_tool/domain_and_url.rb +93 -0
- data/lib/disavow_tool/imported_links.rb +56 -6
- data/lib/disavow_tool/list.rb +10 -3
- data/lib/disavow_tool/version.rb +1 -1
- data/lib/disavow_tool/white_list.rb +20 -14
- data/samples/disavowed2.cvs +1460 -0
- data/samples/whitelist_urls.csv +2 -519
- data/samples/whitelist_urls2.csv +519 -0
- metadata +20 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8db2e508792ce319b2f11c7e4dd43a3e7d31b875a0c3d80aa97828b600e34e96
|
|
4
|
+
data.tar.gz: 868088592ece6710d0326ce9b586c79e072f083cb9bdc2060998c1649552bc87
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a7e6e2ab72198469ff1ec3cfa68497fbefcfec8eba7746ad1c2e17a087a045f2a8c1539de12642c04841256e83958095a20d0c0bc06b625940baf52c796b0546
|
|
7
|
+
data.tar.gz: a4da05637f583bf94cd3b4eb83c234297adf18954339757529e8ace255a008bf6674ed2fa2c80184dcf1002fc624b31695290b7161cfa304e18b58ce74901776
|
data/Gemfile.lock
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
disavow_tool (0.
|
|
4
|
+
disavow_tool (0.3.0)
|
|
5
5
|
activesupport
|
|
6
6
|
colorize
|
|
7
|
+
nokogiri
|
|
7
8
|
|
|
8
9
|
GEM
|
|
9
10
|
remote: https://rubygems.org/
|
|
@@ -18,7 +19,10 @@ GEM
|
|
|
18
19
|
diff-lcs (1.3)
|
|
19
20
|
i18n (1.6.0)
|
|
20
21
|
concurrent-ruby (~> 1.0)
|
|
22
|
+
mini_portile2 (2.4.0)
|
|
21
23
|
minitest (5.11.3)
|
|
24
|
+
nokogiri (1.10.3)
|
|
25
|
+
mini_portile2 (~> 2.4.0)
|
|
22
26
|
rake (10.5.0)
|
|
23
27
|
rspec (3.8.0)
|
|
24
28
|
rspec-core (~> 3.8.0)
|
data/disavow_tool.gemspec
CHANGED
data/lib/disavow_tool.rb
CHANGED
|
@@ -11,8 +11,8 @@ require_relative 'disavow_tool/white_list.rb'
|
|
|
11
11
|
require_relative 'disavow_tool/imported_links.rb'
|
|
12
12
|
|
|
13
13
|
module DisavowTool
|
|
14
|
-
def self.run
|
|
15
14
|
|
|
15
|
+
def self.run
|
|
16
16
|
puts "Importing new links".blue if OPTIONS.verbose
|
|
17
17
|
imported_links = ImportedLinks.new
|
|
18
18
|
|
|
@@ -23,7 +23,10 @@ module DisavowTool
|
|
|
23
23
|
puts "Importing Whitelist links".blue if OPTIONS.verbose
|
|
24
24
|
white_list = WhiteList.new
|
|
25
25
|
puts "Cleagning imported links already in whitelist".blue if OPTIONS.verbose
|
|
26
|
-
imported_links.remove_known_links(white_list)
|
|
26
|
+
imported_links.remove_known_links(white_list.links)
|
|
27
|
+
puts "Cleagning imported links with a domain existingin in whitelist".blue if OPTIONS.verbose
|
|
28
|
+
p white_list.domains
|
|
29
|
+
imported_links.remove_known_links_for_domain(white_list.domains)
|
|
27
30
|
end
|
|
28
31
|
|
|
29
32
|
|
|
@@ -13,13 +13,14 @@ module DisavowTool
|
|
|
13
13
|
options.whitelist = false
|
|
14
14
|
options.verbose = false
|
|
15
15
|
options.hardcore_verbose = false
|
|
16
|
+
options.network_requests = true
|
|
16
17
|
|
|
17
18
|
opt_parser = OptionParser.new do |opts|
|
|
18
19
|
opts.banner = "Usage: disavow.rb [options] --disavow FILE --import file_1,file_2,file_3 [--whitelist file1,file2,file3]"
|
|
19
20
|
opts.separator ""
|
|
20
21
|
opts.separator "Requited options:"
|
|
21
|
-
opts.on("-d","--disavow
|
|
22
|
-
options.
|
|
22
|
+
opts.on("-d","--disavow file_1,file_2", Array, "Disavow files as exported from Google Search Console") do |file|
|
|
23
|
+
options.disavow_files = file
|
|
23
24
|
end
|
|
24
25
|
opts.on("-i","--import file_1,file_2", Array, "List of URLS to analyse. The file must have one URL per line") do |file|
|
|
25
26
|
options.import_files = file
|
|
@@ -36,6 +37,11 @@ module DisavowTool
|
|
|
36
37
|
opts.on("-v", "--verbose", "Vervose mode") do
|
|
37
38
|
options.verbose = true
|
|
38
39
|
end
|
|
40
|
+
|
|
41
|
+
opts.on("-t", "--no-titles", "Don't request tittles from websites thus making the command faster") do
|
|
42
|
+
options.network_requests = false
|
|
43
|
+
end
|
|
44
|
+
|
|
39
45
|
opts.on("-V", "--hardcore-vervose", "Print out even your mama") do
|
|
40
46
|
options.hardcore_verbose = true
|
|
41
47
|
options.verbose = true # Hardcose verbose includes regular verbose
|
|
@@ -60,7 +66,7 @@ module DisavowTool
|
|
|
60
66
|
end
|
|
61
67
|
|
|
62
68
|
def check_arguments(options)
|
|
63
|
-
raise "You must to specify one disallow file" if options.
|
|
69
|
+
raise "You must to specify one disallow file" if options.disavow_files.blank?
|
|
64
70
|
raise "You must to specify one import file" if options.import_files.blank?
|
|
65
71
|
if options.whitelist
|
|
66
72
|
raise "You need to specify at least one white list file" if options.whitelist_files.blank?
|
data/lib/disavow_tool/config.rb
CHANGED
|
@@ -1,101 +1,33 @@
|
|
|
1
1
|
require_relative 'list'
|
|
2
|
+
require_relative 'domain_and_url'
|
|
2
3
|
require 'uri'
|
|
3
4
|
|
|
4
5
|
module DisavowTool
|
|
5
6
|
class DisavowList < List
|
|
6
|
-
|
|
7
|
-
alias_method :links, :disavowed_links
|
|
8
|
-
alias_method :domains, :disavowed_domains
|
|
7
|
+
include DomainAndUrl
|
|
9
8
|
|
|
10
|
-
def initialize(
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@disavowed_links = Set.new
|
|
14
|
-
super(import_file)
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def clean_line!(line)
|
|
18
|
-
case domain_or_url(line)
|
|
19
|
-
when :domain
|
|
20
|
-
@disavowed_domains << remove_domain_prefix(line)
|
|
21
|
-
when :url
|
|
22
|
-
@disavowed_links << line
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def finished_import_hook
|
|
27
|
-
@original_disavowed_domains = @disavowed_domains.clone
|
|
28
|
-
@original_disavowed_links = @disavowed_links.clone
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def add_domain(domain)
|
|
32
|
-
super(domain, @disavowed_domains)
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def add_url(url, list=nil)
|
|
36
|
-
super(url, @disavowed_links)
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def add_domain_from_url(url)
|
|
40
|
-
domain = URI.parse(URI.escape(url)).host
|
|
41
|
-
add_domain(domain)
|
|
42
|
-
p domain
|
|
43
|
-
return domain
|
|
9
|
+
def initialize(import_files=nil)
|
|
10
|
+
import_files = import_files || OPTIONS.disavow_files
|
|
11
|
+
super(import_files)
|
|
44
12
|
end
|
|
45
13
|
|
|
46
14
|
def import_message(domain)
|
|
47
15
|
"Importing #{is_url?(domain).to_s} #{remove_domain_prefix(domain)} into Disavow list"
|
|
48
16
|
end
|
|
49
17
|
def add_url_message(url)
|
|
50
|
-
"+++ Inserting #{is_url?(url).to_s} #{url} in Disavow"
|
|
18
|
+
"+++ Inserting #{is_url?(url).to_s} #{url.on_yellow} in Disavow"
|
|
51
19
|
end
|
|
52
20
|
def message_sumary_imported; "Disavowed elements imported" end
|
|
53
21
|
def mensaje_sumary_before_export; "Disavow elements before exporting" end
|
|
54
22
|
|
|
55
|
-
def
|
|
56
|
-
|
|
57
|
-
super(@disavowed_links, @original_disavowed_links)
|
|
58
|
-
puts "Disavowed Domains:".light_blue
|
|
59
|
-
super(@disavowed_domains, @original_disavowed_domains)
|
|
60
|
-
end
|
|
23
|
+
def message_sumary_links_imported; "Disavowed URLs:" end
|
|
24
|
+
def message_sumary_domains_imported; "Disavowed Domains:" end
|
|
61
25
|
|
|
62
26
|
def export_write(file)
|
|
63
|
-
file.puts "#
|
|
64
|
-
file
|
|
65
|
-
puts "Writing #{
|
|
66
|
-
file.puts "# urls"
|
|
67
|
-
file.puts @disavowed_links.to_a
|
|
68
|
-
puts "Writing #{@disavowed_links.count} Disavowed URLS".blue if @verbose
|
|
69
|
-
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
:private
|
|
73
|
-
def domain_or_url(line)
|
|
74
|
-
if( /^domain/.match(line))
|
|
75
|
-
return :domain
|
|
76
|
-
elsif( /^http/.match(line) )
|
|
77
|
-
return :url
|
|
78
|
-
else
|
|
79
|
-
raise "Error parsing Disavow file"
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
def is_url?(link)
|
|
84
|
-
if( link.match(/^http(s)?\:/) )
|
|
85
|
-
:url
|
|
86
|
-
else
|
|
87
|
-
:domain
|
|
88
|
-
end
|
|
27
|
+
file.puts "# Disavow"
|
|
28
|
+
super(file)
|
|
29
|
+
puts "Writing #{total_elements} elements into the Disavow file".blue if @verbose
|
|
89
30
|
end
|
|
90
31
|
|
|
91
|
-
def remove_domain_prefix(domain)
|
|
92
|
-
domain.gsub(/^domain\:/, '')
|
|
93
|
-
end
|
|
94
|
-
def remove_domain_prefix!(domain)
|
|
95
|
-
domain.gsub!(/^domain\:/, '')
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def add_domain_prefix
|
|
99
|
-
end
|
|
100
32
|
end
|
|
101
33
|
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
module DisavowTool
|
|
2
|
+
module DomainAndUrl
|
|
3
|
+
attr_accessor :links, :domains
|
|
4
|
+
|
|
5
|
+
def initialize(import_files)
|
|
6
|
+
@domains = Set.new
|
|
7
|
+
@links = Set.new
|
|
8
|
+
p "DomainAndUrl"
|
|
9
|
+
super(import_files)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def clean_line!(line)
|
|
13
|
+
case domain_or_url(line)
|
|
14
|
+
when :domain
|
|
15
|
+
@domains << remove_domain_prefix(line)
|
|
16
|
+
when :url
|
|
17
|
+
@links << line
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def finished_import_hook
|
|
22
|
+
@original_domains = @domains.clone
|
|
23
|
+
@original_links = @links.clone
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def add_domain(domain)
|
|
27
|
+
super(domain, @domains)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def add_url(url, list=nil)
|
|
31
|
+
super(url, @links)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def add_domain_from_url(url)
|
|
35
|
+
domain = URI.parse(URI.escape(url)).host
|
|
36
|
+
add_domain(domain)
|
|
37
|
+
p domain
|
|
38
|
+
return domain
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def export_write(file)
|
|
42
|
+
file.puts "# Domains"
|
|
43
|
+
add_domain_prefix
|
|
44
|
+
file.puts @domains.to_a
|
|
45
|
+
puts "Writing #{@domains.count} domains".blue if @verbose
|
|
46
|
+
file.puts "# urls"
|
|
47
|
+
file.puts @links.to_a
|
|
48
|
+
puts "Writing #{@links.count} URLS".blue if @verbose
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def summary
|
|
52
|
+
puts message_sumary_links_imported.green
|
|
53
|
+
super(@links, @original_links)
|
|
54
|
+
puts message_sumary_domains_imported.green
|
|
55
|
+
super(@domains, @original_domains)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
:private
|
|
59
|
+
def domain_or_url(line)
|
|
60
|
+
if( /^domain/.match(line))
|
|
61
|
+
return :domain
|
|
62
|
+
elsif( /^http/.match(line) )
|
|
63
|
+
return :url
|
|
64
|
+
else
|
|
65
|
+
raise "Error parsing Disavow file"
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def is_url?(link)
|
|
70
|
+
unless( link.match(/^http(s)?\:/) )
|
|
71
|
+
:domain
|
|
72
|
+
else
|
|
73
|
+
:url
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def remove_domain_prefix(domain)
|
|
78
|
+
domain.gsub(/^domain\:/, '')
|
|
79
|
+
end
|
|
80
|
+
def remove_domain_prefix!(domain)
|
|
81
|
+
domain.gsub!(/^domain\:/, '')
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def add_domain_prefix
|
|
85
|
+
@domains.collect!{|domain| domain="domain:" + domain}
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def total_elements
|
|
89
|
+
@domains.count + @links.count
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
require_relative 'list'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
require 'open-uri'
|
|
4
|
+
require 'timeout'
|
|
5
|
+
|
|
2
6
|
module DisavowTool
|
|
3
7
|
class ImportedLinks < List
|
|
4
8
|
|
|
@@ -17,18 +21,34 @@ module DisavowTool
|
|
|
17
21
|
def analyse(disavowed, white_list)
|
|
18
22
|
"Ready to delete analize #{@list.count} remaining links"
|
|
19
23
|
@list.each do |url|
|
|
20
|
-
puts "#{"*"*100}\n*
|
|
21
|
-
|
|
24
|
+
puts "#{"*"*100}\n*"
|
|
25
|
+
puts "* Analysing url: #{url.on_green}"
|
|
26
|
+
if OPTIONS.network_requests
|
|
27
|
+
print "* "+ "Obtaining website's title...\r".red.blink
|
|
28
|
+
puts "* Website title: #{website_title(url)}".ljust(100)
|
|
29
|
+
end
|
|
30
|
+
puts "* URls with this same domain: #{urls_with_same_domain(url)}"
|
|
31
|
+
puts "*\n#{"*"*100}"
|
|
32
|
+
puts menu()
|
|
22
33
|
input = $stdin.getch
|
|
23
34
|
input = $stdin.getch if open_browser_option(input, url)
|
|
24
35
|
case input
|
|
25
36
|
when "w"
|
|
37
|
+
raise "Command run with no whitelist option" if OPTIONS.whitelist == false
|
|
26
38
|
white_list.add_url url
|
|
27
39
|
self.delete_url url
|
|
40
|
+
when "W"
|
|
41
|
+
raise "Command run with no whitelist option" if OPTIONS.whitelist == false
|
|
42
|
+
domain = white_list.add_domain_from_url(url)
|
|
43
|
+
self.delete_url url
|
|
44
|
+
puts "Attempting to remove URLs with the domain #{domain} from imported links to stop anaylsing"
|
|
45
|
+
self.delete_urls_if_domains(domain)
|
|
46
|
+
when "a"
|
|
47
|
+
white_list.add_urls_with_same_domain_as url, self
|
|
28
48
|
when "d"
|
|
29
49
|
domain = disavowed.add_domain_from_url(url)
|
|
30
50
|
self.delete_url url
|
|
31
|
-
puts "Attempting to remove URLs with the domain #{domain}"
|
|
51
|
+
puts "Attempting to remove URLs with the domain #{domain} from imported links to stop anaylsing"
|
|
32
52
|
self.delete_urls_if_domains(domain)
|
|
33
53
|
when "u"
|
|
34
54
|
disavowed.add_url(url)
|
|
@@ -54,18 +74,48 @@ module DisavowTool
|
|
|
54
74
|
end
|
|
55
75
|
|
|
56
76
|
:private
|
|
57
|
-
def
|
|
58
|
-
|
|
77
|
+
def menu
|
|
78
|
+
message = ""
|
|
79
|
+
message = "[w] Whitelist url [W] Whitelist the entire domain [a] whitelist as url All urls with this domain\n" if OPTIONS.whitelist
|
|
80
|
+
message += "[d] Disavow as domain [u] Disavow as a URL [o] to open the URL."
|
|
59
81
|
end
|
|
60
82
|
|
|
61
83
|
def open_browser_option(input, link)
|
|
62
84
|
if input == "o"
|
|
63
85
|
if Gem.win_platform? then system "start chrome #{link}" else system "open -a safari #{link}" end
|
|
64
86
|
puts "Opening #{link}...".blue
|
|
65
|
-
|
|
87
|
+
puts menu
|
|
66
88
|
return true
|
|
67
89
|
end
|
|
68
90
|
end
|
|
69
91
|
|
|
92
|
+
def website_title(url)
|
|
93
|
+
begin
|
|
94
|
+
Timeout::timeout(SECONDS_TITTLE_REQUEST) do
|
|
95
|
+
page = Nokogiri::HTML(open(URI.escape(url)))
|
|
96
|
+
return "Empty Title" if page.css("title").blank?
|
|
97
|
+
return page.css("title")[0].text
|
|
98
|
+
end
|
|
99
|
+
rescue Timeout::Error => e
|
|
100
|
+
return "Empty Title — Request Time Out: #{e}"
|
|
101
|
+
rescue OpenURI::HTTPError => e
|
|
102
|
+
return "Empty Title. HTTP Error: #{e}"
|
|
103
|
+
rescue SocketError => e
|
|
104
|
+
return "Empty Title. Can't open site: #{e}"
|
|
105
|
+
rescue
|
|
106
|
+
return "Empty Tittle "
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def urls_with_same_domain(url)
|
|
112
|
+
domain = URI.parse(URI.escape(url)).host
|
|
113
|
+
counter = 0
|
|
114
|
+
self.each do |link|
|
|
115
|
+
counter += 1 if URI.parse(URI.escape(link)).host == domain
|
|
116
|
+
end
|
|
117
|
+
counter
|
|
118
|
+
end
|
|
119
|
+
|
|
70
120
|
end
|
|
71
121
|
end
|
data/lib/disavow_tool/list.rb
CHANGED
|
@@ -20,6 +20,10 @@ module DisavowTool
|
|
|
20
20
|
puts "Importing file: #{file}"
|
|
21
21
|
File.readlines(file).each do |line|
|
|
22
22
|
line.chomp!
|
|
23
|
+
if comment?(line) || line.blank?
|
|
24
|
+
puts "cleaning comment or empty line: #{line}" if @verbose_hard
|
|
25
|
+
next
|
|
26
|
+
end
|
|
23
27
|
unmodified_line = line
|
|
24
28
|
clean_line!(line)
|
|
25
29
|
puts import_message(unmodified_line).light_blue if @verbose_hard
|
|
@@ -42,8 +46,7 @@ module DisavowTool
|
|
|
42
46
|
def add_url(url, list=nil)
|
|
43
47
|
list = list || @list
|
|
44
48
|
if list.add? url
|
|
45
|
-
|
|
46
|
-
puts add_url_message(color_url).blue if @verbose
|
|
49
|
+
puts add_url_message(url).blue if @verbose
|
|
47
50
|
else
|
|
48
51
|
puts "Not adding #{url}. Already in the list.".red if @verbose
|
|
49
52
|
end
|
|
@@ -59,7 +62,7 @@ module DisavowTool
|
|
|
59
62
|
end
|
|
60
63
|
|
|
61
64
|
def mass_delete_urls(urls_to_delete)
|
|
62
|
-
puts mass_delete_message if @
|
|
65
|
+
puts mass_delete_message if @verbose_hard
|
|
63
66
|
urls_to_delete.each do |link|
|
|
64
67
|
color_link = link.on_yellow
|
|
65
68
|
puts delete_url_message(color_link).red if @verbose
|
|
@@ -118,6 +121,10 @@ module DisavowTool
|
|
|
118
121
|
end
|
|
119
122
|
end
|
|
120
123
|
|
|
124
|
+
def comment?(line)
|
|
125
|
+
line.match(/^#/) ? true : false
|
|
126
|
+
end
|
|
127
|
+
|
|
121
128
|
def each
|
|
122
129
|
@list.each do |element|
|
|
123
130
|
yield(element)
|