disavow_tool 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c7ec191f824e770810f284c9893491f27468279a136bbfc075f8319e51f84c7a
4
- data.tar.gz: 63626b8f6505c63e13930da9fb16b25d543256230ad149475d8edbc229a4b459
3
+ metadata.gz: 8db2e508792ce319b2f11c7e4dd43a3e7d31b875a0c3d80aa97828b600e34e96
4
+ data.tar.gz: 868088592ece6710d0326ce9b586c79e072f083cb9bdc2060998c1649552bc87
5
5
  SHA512:
6
- metadata.gz: 0e77f7322716892366f2c22c1af5c7d6a31a8d50e088322bca3b1620aedab22b66440eee819590baac8cdd2a32e5100735f96c7565ec7203b4751cad8622878b
7
- data.tar.gz: 751b98ce7205bb45e3eb472c1e76d8262aedca76e2d18774faacd0985a8385ceaad7e3e447f7e8c558927a05cc0fc29dfdc7a1c22ffced73244589faf7027cfa
6
+ metadata.gz: a7e6e2ab72198469ff1ec3cfa68497fbefcfec8eba7746ad1c2e17a087a045f2a8c1539de12642c04841256e83958095a20d0c0bc06b625940baf52c796b0546
7
+ data.tar.gz: a4da05637f583bf94cd3b4eb83c234297adf18954339757529e8ace255a008bf6674ed2fa2c80184dcf1002fc624b31695290b7161cfa304e18b58ce74901776
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- disavow_tool (0.1.1)
4
+ disavow_tool (0.3.0)
5
5
  activesupport
6
6
  colorize
7
+ nokogiri
7
8
 
8
9
  GEM
9
10
  remote: https://rubygems.org/
@@ -18,7 +19,10 @@ GEM
18
19
  diff-lcs (1.3)
19
20
  i18n (1.6.0)
20
21
  concurrent-ruby (~> 1.0)
22
+ mini_portile2 (2.4.0)
21
23
  minitest (5.11.3)
24
+ nokogiri (1.10.3)
25
+ mini_portile2 (~> 2.4.0)
22
26
  rake (10.5.0)
23
27
  rspec (3.8.0)
24
28
  rspec-core (~> 3.8.0)
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/disavow_tool'
3
+
4
+ DisavowTool.run
@@ -36,4 +36,5 @@ Gem::Specification.new do |spec|
36
36
  # Dependencies
37
37
  spec.add_dependency "colorize"
38
38
  spec.add_dependency "activesupport"
39
+ spec.add_dependency "nokogiri"
39
40
  end
@@ -11,8 +11,8 @@ require_relative 'disavow_tool/white_list.rb'
11
11
  require_relative 'disavow_tool/imported_links.rb'
12
12
 
13
13
  module DisavowTool
14
- def self.run
15
14
 
15
+ def self.run
16
16
  puts "Importing new links".blue if OPTIONS.verbose
17
17
  imported_links = ImportedLinks.new
18
18
 
@@ -23,7 +23,10 @@ module DisavowTool
23
23
  puts "Importing Whitelist links".blue if OPTIONS.verbose
24
24
  white_list = WhiteList.new
25
25
  puts "Cleagning imported links already in whitelist".blue if OPTIONS.verbose
26
- imported_links.remove_known_links(white_list)
26
+ imported_links.remove_known_links(white_list.links)
27
+ puts "Cleagning imported links with a domain existingin in whitelist".blue if OPTIONS.verbose
28
+ p white_list.domains
29
+ imported_links.remove_known_links_for_domain(white_list.domains)
27
30
  end
28
31
 
29
32
 
@@ -13,13 +13,14 @@ module DisavowTool
13
13
  options.whitelist = false
14
14
  options.verbose = false
15
15
  options.hardcore_verbose = false
16
+ options.network_requests = true
16
17
 
17
18
  opt_parser = OptionParser.new do |opts|
18
19
  opts.banner = "Usage: disavow.rb [options] --disavow FILE --import file_1,file_2,file_3 [--whitelist file1,file2,file3]"
19
20
  opts.separator ""
20
21
  opts.separator "Requited options:"
21
- opts.on("-d","--disavow FILE", "Disavow file as exported from Google Search Console") do |file|
22
- options.disavow_file = file
22
+ opts.on("-d","--disavow file_1,file_2", Array, "Disavow files as exported from Google Search Console") do |file|
23
+ options.disavow_files = file
23
24
  end
24
25
  opts.on("-i","--import file_1,file_2", Array, "List of URLS to analyse. The file must have one URL per line") do |file|
25
26
  options.import_files = file
@@ -36,6 +37,11 @@ module DisavowTool
36
37
  opts.on("-v", "--verbose", "Vervose mode") do
37
38
  options.verbose = true
38
39
  end
40
+
41
+ opts.on("-t", "--no-titles", "Don't request tittles from websites thus making the command faster") do
42
+ options.network_requests = false
43
+ end
44
+
39
45
  opts.on("-V", "--hardcore-vervose", "Print out even your mama") do
40
46
  options.hardcore_verbose = true
41
47
  options.verbose = true # Hardcose verbose includes regular verbose
@@ -60,7 +66,7 @@ module DisavowTool
60
66
  end
61
67
 
62
68
  def check_arguments(options)
63
- raise "You must to specify one disallow file" if options.disavow_file.blank?
69
+ raise "You must to specify one disallow file" if options.disavow_files.blank?
64
70
  raise "You must to specify one import file" if options.import_files.blank?
65
71
  if options.whitelist
66
72
  raise "You need to specify at least one white list file" if options.whitelist_files.blank?
@@ -1,4 +1,5 @@
1
1
  module DisavowTool
2
2
  EXPORT_PATH = "exports/"
3
3
  IMPORT_PATH = "test/"
4
+ SECONDS_TITTLE_REQUEST = 5
4
5
  end
@@ -1,101 +1,33 @@
1
1
  require_relative 'list'
2
+ require_relative 'domain_and_url'
2
3
  require 'uri'
3
4
 
4
5
  module DisavowTool
5
6
  class DisavowList < List
6
- attr_accessor :disavowed_links, :disavowed_domains
7
- alias_method :links, :disavowed_links
8
- alias_method :domains, :disavowed_domains
7
+ include DomainAndUrl
9
8
 
10
- def initialize(import_file=nil)
11
- import_file = import_file || OPTIONS.disavow_file
12
- @disavowed_domains = Set.new
13
- @disavowed_links = Set.new
14
- super(import_file)
15
- end
16
-
17
- def clean_line!(line)
18
- case domain_or_url(line)
19
- when :domain
20
- @disavowed_domains << remove_domain_prefix(line)
21
- when :url
22
- @disavowed_links << line
23
- end
24
- end
25
-
26
- def finished_import_hook
27
- @original_disavowed_domains = @disavowed_domains.clone
28
- @original_disavowed_links = @disavowed_links.clone
29
- end
30
-
31
- def add_domain(domain)
32
- super(domain, @disavowed_domains)
33
- end
34
-
35
- def add_url(url, list=nil)
36
- super(url, @disavowed_links)
37
- end
38
-
39
- def add_domain_from_url(url)
40
- domain = URI.parse(URI.escape(url)).host
41
- add_domain(domain)
42
- p domain
43
- return domain
9
+ def initialize(import_files=nil)
10
+ import_files = import_files || OPTIONS.disavow_files
11
+ super(import_files)
44
12
  end
45
13
 
46
14
  def import_message(domain)
47
15
  "Importing #{is_url?(domain).to_s} #{remove_domain_prefix(domain)} into Disavow list"
48
16
  end
49
17
  def add_url_message(url)
50
- "+++ Inserting #{is_url?(url).to_s} #{url} in Disavow"
18
+ "+++ Inserting #{is_url?(url).to_s} #{url.on_yellow} in Disavow"
51
19
  end
52
20
  def message_sumary_imported; "Disavowed elements imported" end
53
21
  def mensaje_sumary_before_export; "Disavow elements before exporting" end
54
22
 
55
- def summary
56
- puts "Disavowed URLs:".light_blue
57
- super(@disavowed_links, @original_disavowed_links)
58
- puts "Disavowed Domains:".light_blue
59
- super(@disavowed_domains, @original_disavowed_domains)
60
- end
23
+ def message_sumary_links_imported; "Disavowed URLs:" end
24
+ def message_sumary_domains_imported; "Disavowed Domains:" end
61
25
 
62
26
  def export_write(file)
63
- file.puts "# Domains"
64
- file.puts @disavowed_domains.to_a
65
- puts "Writing #{@disavowed_domains.count} Disavowed domains".blue if @verbose
66
- file.puts "# urls"
67
- file.puts @disavowed_links.to_a
68
- puts "Writing #{@disavowed_links.count} Disavowed URLS".blue if @verbose
69
-
70
- end
71
-
72
- :private
73
- def domain_or_url(line)
74
- if( /^domain/.match(line))
75
- return :domain
76
- elsif( /^http/.match(line) )
77
- return :url
78
- else
79
- raise "Error parsing Disavow file"
80
- end
81
- end
82
-
83
- def is_url?(link)
84
- if( link.match(/^http(s)?\:/) )
85
- :url
86
- else
87
- :domain
88
- end
27
+ file.puts "# Disavow"
28
+ super(file)
29
+ puts "Writing #{total_elements} elements into the Disavow file".blue if @verbose
89
30
  end
90
31
 
91
- def remove_domain_prefix(domain)
92
- domain.gsub(/^domain\:/, '')
93
- end
94
- def remove_domain_prefix!(domain)
95
- domain.gsub!(/^domain\:/, '')
96
- end
97
-
98
- def add_domain_prefix
99
- end
100
32
  end
101
33
  end
@@ -0,0 +1,93 @@
1
+ module DisavowTool
2
+ module DomainAndUrl
3
+ attr_accessor :links, :domains
4
+
5
+ def initialize(import_files)
6
+ @domains = Set.new
7
+ @links = Set.new
8
+ p "DomainAndUrl"
9
+ super(import_files)
10
+ end
11
+
12
+ def clean_line!(line)
13
+ case domain_or_url(line)
14
+ when :domain
15
+ @domains << remove_domain_prefix(line)
16
+ when :url
17
+ @links << line
18
+ end
19
+ end
20
+
21
+ def finished_import_hook
22
+ @original_domains = @domains.clone
23
+ @original_links = @links.clone
24
+ end
25
+
26
+ def add_domain(domain)
27
+ super(domain, @domains)
28
+ end
29
+
30
+ def add_url(url, list=nil)
31
+ super(url, @links)
32
+ end
33
+
34
+ def add_domain_from_url(url)
35
+ domain = URI.parse(URI.escape(url)).host
36
+ add_domain(domain)
37
+ p domain
38
+ return domain
39
+ end
40
+
41
+ def export_write(file)
42
+ file.puts "# Domains"
43
+ add_domain_prefix
44
+ file.puts @domains.to_a
45
+ puts "Writing #{@domains.count} domains".blue if @verbose
46
+ file.puts "# urls"
47
+ file.puts @links.to_a
48
+ puts "Writing #{@links.count} URLS".blue if @verbose
49
+ end
50
+
51
+ def summary
52
+ puts message_sumary_links_imported.green
53
+ super(@links, @original_links)
54
+ puts message_sumary_domains_imported.green
55
+ super(@domains, @original_domains)
56
+ end
57
+
58
+ :private
59
+ def domain_or_url(line)
60
+ if( /^domain/.match(line))
61
+ return :domain
62
+ elsif( /^http/.match(line) )
63
+ return :url
64
+ else
65
+ raise "Error parsing Disavow file"
66
+ end
67
+ end
68
+
69
+ def is_url?(link)
70
+ unless( link.match(/^http(s)?\:/) )
71
+ :domain
72
+ else
73
+ :url
74
+ end
75
+ end
76
+
77
+ def remove_domain_prefix(domain)
78
+ domain.gsub(/^domain\:/, '')
79
+ end
80
+ def remove_domain_prefix!(domain)
81
+ domain.gsub!(/^domain\:/, '')
82
+ end
83
+
84
+ def add_domain_prefix
85
+ @domains.collect!{|domain| domain="domain:" + domain}
86
+ end
87
+
88
+ def total_elements
89
+ @domains.count + @links.count
90
+ end
91
+
92
+ end
93
+ end
@@ -1,4 +1,8 @@
1
1
  require_relative 'list'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+ require 'timeout'
5
+
2
6
  module DisavowTool
3
7
  class ImportedLinks < List
4
8
 
@@ -17,18 +21,34 @@ module DisavowTool
17
21
  def analyse(disavowed, white_list)
18
22
  "Ready to delete analize #{@list.count} remaining links"
19
23
  @list.each do |url|
20
- puts "#{"*"*100}\n* Analysing url: #{url.on_green}\n#{"*"*100}"
21
- display_menu()
24
+ puts "#{"*"*100}\n*"
25
+ puts "* Analysing url: #{url.on_green}"
26
+ if OPTIONS.network_requests
27
+ print "* "+ "Obtaining website's title...\r".red.blink
28
+ puts "* Website title: #{website_title(url)}".ljust(100)
29
+ end
30
+ puts "* URls with this same domain: #{urls_with_same_domain(url)}"
31
+ puts "*\n#{"*"*100}"
32
+ puts menu()
22
33
  input = $stdin.getch
23
34
  input = $stdin.getch if open_browser_option(input, url)
24
35
  case input
25
36
  when "w"
37
+ raise "Command run with no whitelist option" if OPTIONS.whitelist == false
26
38
  white_list.add_url url
27
39
  self.delete_url url
40
+ when "W"
41
+ raise "Command run with no whitelist option" if OPTIONS.whitelist == false
42
+ domain = white_list.add_domain_from_url(url)
43
+ self.delete_url url
44
+ puts "Attempting to remove URLs with the domain #{domain} from imported links to stop anaylsing"
45
+ self.delete_urls_if_domains(domain)
46
+ when "a"
47
+ white_list.add_urls_with_same_domain_as url, self
28
48
  when "d"
29
49
  domain = disavowed.add_domain_from_url(url)
30
50
  self.delete_url url
31
- puts "Attempting to remove URLs with the domain #{domain}"
51
+ puts "Attempting to remove URLs with the domain #{domain} from imported links to stop anaylsing"
32
52
  self.delete_urls_if_domains(domain)
33
53
  when "u"
34
54
  disavowed.add_url(url)
@@ -54,18 +74,48 @@ module DisavowTool
54
74
  end
55
75
 
56
76
  :private
57
- def display_menu
58
- puts "[w] to send to whitelist [d] to send to Disavow as a domain [u] to send to Disavow as a URL [o] to open the URL."
77
+ def menu
78
+ message = ""
79
+ message = "[w] Whitelist url [W] Whitelist the entire domain [a] whitelist as url All urls with this domain\n" if OPTIONS.whitelist
80
+ message += "[d] Disavow as domain [u] Disavow as a URL [o] to open the URL."
59
81
  end
60
82
 
61
83
  def open_browser_option(input, link)
62
84
  if input == "o"
63
85
  if Gem.win_platform? then system "start chrome #{link}" else system "open -a safari #{link}" end
64
86
  puts "Opening #{link}...".blue
65
- display_menu
87
+ puts menu
66
88
  return true
67
89
  end
68
90
  end
69
91
 
92
+ def website_title(url)
93
+ begin
94
+ Timeout::timeout(SECONDS_TITTLE_REQUEST) do
95
+ page = Nokogiri::HTML(open(URI.escape(url)))
96
+ return "Empty Title" if page.css("title").blank?
97
+ return page.css("title")[0].text
98
+ end
99
+ rescue Timeout::Error => e
100
+ return "Empty Title — Request Time Out: #{e}"
101
+ rescue OpenURI::HTTPError => e
102
+ return "Empty Title. HTTP Error: #{e}"
103
+ rescue SocketError => e
104
+ return "Empty Title. Can't open site: #{e}"
105
+ rescue
106
+ return "Empty Tittle "
107
+ end
108
+
109
+ end
110
+
111
+ def urls_with_same_domain(url)
112
+ domain = URI.parse(URI.escape(url)).host
113
+ counter = 0
114
+ self.each do |link|
115
+ counter += 1 if URI.parse(URI.escape(link)).host == domain
116
+ end
117
+ counter
118
+ end
119
+
70
120
  end
71
121
  end
@@ -20,6 +20,10 @@ module DisavowTool
20
20
  puts "Importing file: #{file}"
21
21
  File.readlines(file).each do |line|
22
22
  line.chomp!
23
+ if comment?(line) || line.blank?
24
+ puts "cleaning comment or empty line: #{line}" if @verbose_hard
25
+ next
26
+ end
23
27
  unmodified_line = line
24
28
  clean_line!(line)
25
29
  puts import_message(unmodified_line).light_blue if @verbose_hard
@@ -42,8 +46,7 @@ module DisavowTool
42
46
  def add_url(url, list=nil)
43
47
  list = list || @list
44
48
  if list.add? url
45
- color_url = url.on_yellow
46
- puts add_url_message(color_url).blue if @verbose
49
+ puts add_url_message(url).blue if @verbose
47
50
  else
48
51
  puts "Not adding #{url}. Already in the list.".red if @verbose
49
52
  end
@@ -59,7 +62,7 @@ module DisavowTool
59
62
  end
60
63
 
61
64
  def mass_delete_urls(urls_to_delete)
62
- puts mass_delete_message if @verbose
65
+ puts mass_delete_message if @verbose_hard
63
66
  urls_to_delete.each do |link|
64
67
  color_link = link.on_yellow
65
68
  puts delete_url_message(color_link).red if @verbose
@@ -118,6 +121,10 @@ module DisavowTool
118
121
  end
119
122
  end
120
123
 
124
+ def comment?(line)
125
+ line.match(/^#/) ? true : false
126
+ end
127
+
121
128
  def each
122
129
  @list.each do |element|
123
130
  yield(element)