webmole 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: df9c01135d0b83dafa566f883f43f35defcc819873709a8da2a176b149444451
4
+ data.tar.gz: 11c6c44c605c0d740bc4ebc244b6e34acd56f637f80fbde72b177133df7fee6f
5
+ SHA512:
6
+ metadata.gz: 7203dd2fc644fe3cee8a6c4c6c31bcb100596e157ad5ab274d6774d42a36779c263b371e7016c1a532d557f13edc5df73aaa72905fc751f4633868b245be1919
7
+ data.tar.gz: 60d83b4f4e70d4da6d2f7c086f90b15696a0ad97042e34a97a0528c02f3eac9883ca32d6b4ffa8f334451eefd39eae006675465a0b72e191640af5326b21e2d4
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Subnetmasked
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,49 @@
1
+ # WebMole
2
+
3
+ WebMole is a powerful web scraping tool built in Ruby. It allows you to crawl websites and extract various types of information such as emails, phone numbers, URLs, social media handles, addresses, and more.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'webmole'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle install
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install webmole
20
+
21
+ ## Usage
22
+
23
+ To use WebMole, run the following command:
24
+
25
+ ```
26
+ webmole -u https://example.com -s emails -d 2
27
+ ```
28
+
29
+ This will crawl https://example.com to a depth of 2 and extract all email addresses found.
30
+
31
+ For more options, run:
32
+
33
+ ```
34
+ webmole --help
35
+ ```
36
+
37
+ ## Development
38
+
39
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
40
+
41
+ To install this gem onto your local machine, run `bundle exec rake install`.
42
+
43
+ ## Contributing
44
+
45
+ Bug reports and pull requests are welcome on GitHub at https://github.com/yourusername/webmole.
46
+
47
+ ## License
48
+
49
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/bin/webmole ADDED
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'webmole'
5
+
6
+ options = {}
7
+ OptionParser.new do |opts|
8
+ opts.banner = "Usage: webmole [options]"
9
+
10
+ opts.on("-v", "--version", "Show version") do
11
+ puts "WebMole version #{WebMole::VERSION}"
12
+ exit
13
+ end
14
+
15
+ opts.on("-d", "--depth DEPTH", Integer, "Crawl depth (default: 3)") do |d|
16
+ options[:depth] = d
17
+ end
18
+
19
+ opts.on("-u", "--url URL", "Starting URL to crawl") do |url|
20
+ options[:url] = url
21
+ end
22
+
23
+ opts.on("-h", "--help", "Show this help message") do
24
+ puts opts
25
+ exit
26
+ end
27
+
28
+ opts.on("--delay SECONDS", Float, "Delay between requests in seconds (default: 1.0)") do |delay|
29
+ options[:delay] = delay
30
+ end
31
+
32
+ opts.on("-o", "--output FILE", "Output file") do |file|
33
+ options[:output] = file
34
+ end
35
+
36
+ opts.on("-f", "--format FORMAT", "Output format (txt, yaml, or csv)") do |format|
37
+ options[:format] = format.downcase
38
+ end
39
+
40
+ opts.on("-t", "--threads NUM", Integer, "Number of threads to use (default: 1)") do |t|
41
+ options[:threads] = t
42
+ end
43
+
44
+ opts.on("-s", "--scrape OPTION", "Scrape option (emails, phone_numbers, urls, social_media, addresses, credit_cards, custom)") do |option|
45
+ options[:scrape_option] = option.to_sym
46
+ end
47
+
48
+ opts.on("-p", "--pattern REGEX", "Custom regex pattern to search for (use with -s custom)") do |pattern|
49
+ options[:pattern] = pattern
50
+ end
51
+
52
+ opts.on("--verbose", "Enable verbose output") do
53
+ options[:verbose] = true
54
+ end
55
+
56
+ opts.on("--restrict-domain", "Restrict crawling to the initial domain") do
57
+ options[:restrict_domain] = true
58
+ end
59
+
60
+ opts.on("--timeout SECONDS", Integer, "Set a timeout for the crawl (default: 300 seconds)") do |t|
61
+ options[:timeout] = t
62
+ end
63
+
64
+ opts.on("--save-source-url", "Save the source URL for each match") do
65
+ options[:save_source_url] = true
66
+ end
67
+
68
+ opts.on("-g", "--gathering", "Enable gathering mode") do
69
+ options[:gathering_mode] = true
70
+ end
71
+ end.parse!
72
+
73
+ WebMole.print_banner
74
+ WebMole.print_disclaimer
75
+
76
+ begin
77
+ puts "\nPress Enter to continue or Ctrl+C to exit.".colorize(:yellow)
78
+ gets
79
+ WebMole.run(options)
80
+ rescue Interrupt
81
+ puts "\nScript terminated by user.".colorize(:red)
82
+ end
@@ -0,0 +1,161 @@
1
+ require_relative 'scraper'
2
+ require_relative 'output_formatter'
3
+ require_relative 'user_agent_switcher'
4
+ require_relative 'gathering_mode'
5
+ require 'open-uri'
6
+
7
+ module WebMole
8
+ class Crawler
9
+ def initialize(options)
10
+ @url = options[:url]
11
+ @depth = options[:depth] || 3
12
+ @delay = options[:delay] || 1.0
13
+ @threads = options[:threads] || 1
14
+ @verbose = options[:verbose]
15
+ @restrict_domain = options[:restrict_domain]
16
+ @initial_domain = URI(@url).host
17
+ @timeout = options[:timeout] || 300
18
+ @urls_to_scrape = Set.new
19
+ @visited = Set.new
20
+ @mutex = Mutex.new
21
+ @output = options[:output]
22
+ @format = options[:format]
23
+ @save_source_url = options[:save_source_url]
24
+ @user_agent_switcher = UserAgentSwitcher.new
25
+ @gathering_mode = options[:gathering_mode]
26
+ @scraper = Scraper.new(options[:scrape_option], options[:pattern], @gathering_mode)
27
+ GatheringMode.setup if @gathering_mode
28
+ end
29
+
30
+ def crawl
31
+ start_time = Time.now
32
+
33
+ puts "Phase 1: Discovering URLs to scrape...".colorize(:cyan)
34
+ discover_urls
35
+
36
+ puts "\nPhase 2: Scraping discovered URLs...".colorize(:cyan)
37
+ process_urls
38
+
39
+ end_time = Time.now
40
+ print_summary(start_time, end_time)
41
+ end
42
+
43
+ private
44
+
45
+ def discover_urls
46
+ queue = Queue.new
47
+ queue.push([@url, @depth])
48
+ @visited.add(@url)
49
+
50
+ thread_count = [@threads, 1].max # Ensure at least 1 thread
51
+ discovery_threads = thread_count.times.map do
52
+ Thread.new do
53
+ while !queue.empty?
54
+ url, depth = queue.pop(true) rescue nil
55
+ break unless url && depth
56
+ discover_links(url, depth, queue)
57
+ end
58
+ end
59
+ end
60
+
61
+ discovery_threads.each(&:join)
62
+ puts "Discovered #{@urls_to_scrape.size} URLs to scrape.".colorize(:green)
63
+ end
64
+
65
+ def discover_links(url, depth, queue)
66
+ return if depth < 0
67
+
68
+ puts "Discovering: #{url}".colorize(:light_blue) if @verbose
69
+
70
+ begin
71
+ doc = fetch_page(url)
72
+ find_links(doc, url).each do |link|
73
+ next if @restrict_domain && URI(link).host != @initial_domain
74
+ if @visited.add?(link)
75
+ @urls_to_scrape.add(link)
76
+ queue.push([link, depth - 1])
77
+ end
78
+ end
79
+ rescue StandardError => e
80
+ puts "Error discovering links from #{url}: #{e.message}".colorize(:red) if @verbose
81
+ ensure
82
+ sleep(@delay)
83
+ end
84
+ end
85
+
86
+ def find_links(doc, base_url)
87
+ base_uri = URI(base_url)
88
+ doc.css('a').map { |link| link['href'] }.compact.map do |href|
89
+ begin
90
+ uri = URI(href)
91
+ if uri.scheme.nil?
92
+ URI.join(base_uri, href).to_s
93
+ elsif ['http', 'https'].include?(uri.scheme.downcase)
94
+ uri.to_s
95
+ else
96
+ nil
97
+ end
98
+ rescue URI::InvalidURIError, NoMethodError
99
+ nil
100
+ end
101
+ end.compact
102
+ end
103
+
104
+ def process_urls
105
+ total_urls = @urls_to_scrape.size
106
+ processed = 0
107
+ start_time = Time.now
108
+
109
+ @urls_to_scrape.each do |url|
110
+ crawl_url(url)
111
+ processed += 1
112
+
113
+ elapsed_time = Time.now - start_time
114
+ avg_time_per_url = elapsed_time / processed
115
+ estimated_time_left = avg_time_per_url * (total_urls - processed)
116
+
117
+ puts "Processed: #{processed}/#{total_urls} | " \
118
+ "Elapsed: #{format_time(elapsed_time)} | " \
119
+ "Est. Left: #{format_time(estimated_time_left)}".colorize(:cyan)
120
+ end
121
+ end
122
+
123
+ def crawl_url(url)
124
+ puts "Crawling: #{url}".colorize(:light_blue)
125
+
126
+ begin
127
+ doc = fetch_page(url)
128
+ @scraper.scrape(doc.text, url)
129
+ rescue OpenURI::HTTPError => e
130
+ puts "HTTP Error crawling #{url}: #{e.message}".colorize(:red)
131
+ rescue SocketError, URI::InvalidURIError => e
132
+ puts "Error crawling #{url}: #{e.message}".colorize(:red)
133
+ rescue StandardError => e
134
+ puts "Unexpected error crawling #{url}: #{e.message}".colorize(:red)
135
+ ensure
136
+ sleep(@delay)
137
+ end
138
+ end
139
+
140
+ def fetch_page(url)
141
+ user_agent = @user_agent_switcher.random_user_agent
142
+ Nokogiri::HTML(URI.open(url, 'User-Agent' => user_agent))
143
+ end
144
+
145
+ def format_time(seconds)
146
+ minutes, seconds = seconds.divmod(60)
147
+ hours, minutes = minutes.divmod(60)
148
+ [hours, minutes, seconds].map { |t| t.to_i.to_s.rjust(2, '0') }.join(':')
149
+ end
150
+
151
+ def print_summary(start_time, end_time)
152
+ puts "\nCrawling complete!".colorize(:green)
153
+ puts "Total URLs processed: #{@urls_to_scrape.size}".colorize(:cyan)
154
+ puts "Total matches found: #{@scraper.matches.size}".colorize(:cyan)
155
+ puts "Total time: #{format_time(end_time - start_time)}".colorize(:cyan)
156
+
157
+ OutputFormatter.new(@format, @output, @save_source_url).format_results(@scraper.matches)
158
+ GatheringMode.print_summary if @gathering_mode
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,45 @@
1
+ require 'fileutils'
2
+ require 'set'
3
+
4
+ module WebMole
5
+ class GatheringMode
6
+ @gathering_dir = File.join(Dir.home, '.webmole_gathered')
7
+ @gathered_data = Hash.new { |h, k| h[k] = Set.new }
8
+
9
+ class << self
10
+ def setup
11
+ FileUtils.mkdir_p(@gathering_dir) unless File.directory?(@gathering_dir)
12
+ load_existing_data
13
+ end
14
+
15
+ def save_to_gathering(match, type)
16
+ @gathered_data[type].add(match)
17
+ save_data(type)
18
+ end
19
+
20
+ def print_summary
21
+ puts "\nGathering mode summary:".colorize(:green)
22
+ @gathered_data.each do |type, matches|
23
+ puts "#{type}: #{matches.size} unique entries".colorize(:cyan)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def load_existing_data
30
+ Dir.glob(File.join(@gathering_dir, 'gathered_*.txt')).each do |file|
31
+ type = File.basename(file, '.txt').sub('gathered_', '')
32
+ File.readlines(file, chomp: true).each do |line|
33
+ @gathered_data[type].add(line)
34
+ end
35
+ end
36
+ end
37
+
38
+ def save_data(type)
39
+ File.open(File.join(@gathering_dir, "gathered_#{type}.txt"), 'w') do |f|
40
+ @gathered_data[type].each { |match| f.puts match }
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,84 @@
1
+ require 'yaml'
2
+ require 'csv'
3
+
4
+ module WebMole
5
+ class OutputFormatter
6
+ def initialize(format = nil, output = nil, save_source_url = false)
7
+ @format = format
8
+ @output = output
9
+ @save_source_url = save_source_url
10
+ end
11
+
12
+ def format_results(matches)
13
+ if @output
14
+ save_results(matches)
15
+ else
16
+ print_results(matches)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def print_results(matches)
23
+ puts "Found matches:".colorize(:green)
24
+ matches.each do |match, urls|
25
+ if @save_source_url
26
+ urls.each { |url| puts "#{match} - #{url}".colorize(:cyan) }
27
+ else
28
+ puts match.colorize(:cyan)
29
+ end
30
+ end
31
+ puts "Total matches found: #{matches.size}".colorize(:yellow)
32
+ end
33
+
34
+ def save_results(matches)
35
+ case @format
36
+ when 'txt'
37
+ save_as_txt(matches)
38
+ when 'yaml'
39
+ save_as_yaml(matches)
40
+ when 'csv'
41
+ save_as_csv(matches)
42
+ else
43
+ puts "Unsupported format: #{@format}".colorize(:red)
44
+ end
45
+ puts "Total matches found: #{matches.size}".colorize(:yellow)
46
+ end
47
+
48
+ def save_as_txt(matches)
49
+ File.open(@output, 'w') do |file|
50
+ file.puts "Found matches:"
51
+ matches.each do |match, urls|
52
+ if @save_source_url
53
+ urls.each { |url| file.puts "#{match} - #{url}" }
54
+ else
55
+ file.puts match
56
+ end
57
+ end
58
+ end
59
+ puts "Results saved to #{@output}".colorize(:green)
60
+ end
61
+
62
+ def save_as_yaml(matches)
63
+ File.open(@output, 'w') do |file|
64
+ data = @save_source_url ? matches : matches.keys
65
+ file.write({ matches: data }.to_yaml)
66
+ end
67
+ puts "Results saved to #{@output}".colorize(:green)
68
+ end
69
+
70
+ def save_as_csv(matches)
71
+ CSV.open(@output, 'w') do |csv|
72
+ csv << (@save_source_url ? ['Match', 'Source URL'] : ['Match'])
73
+ matches.each do |match, urls|
74
+ if @save_source_url
75
+ urls.each { |url| csv << [match, url] }
76
+ else
77
+ csv << [match]
78
+ end
79
+ end
80
+ end
81
+ puts "Results saved to #{@output}".colorize(:green)
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,77 @@
1
+ require_relative 'gathering_mode'
2
+
3
+ module WebMole
4
+ class Scraper
5
+ attr_reader :matches
6
+
7
+ def initialize(scrape_option, pattern, gathering_mode = false)
8
+ @scrape_option = scrape_option
9
+ @pattern = pattern ? Regexp.new(pattern) : nil
10
+ @matches = Hash.new { |h, k| h[k] = Set.new }
11
+ @gathering_mode = gathering_mode
12
+ end
13
+
14
+ def scrape(text, url)
15
+ case @scrape_option
16
+ when :emails
17
+ find_emails(text, url)
18
+ when :phone_numbers
19
+ find_phone_numbers(text, url)
20
+ when :urls
21
+ find_urls(text, url)
22
+ when :social_media
23
+ find_social_media(text, url)
24
+ when :addresses
25
+ find_addresses(text, url)
26
+ when :credit_cards
27
+ find_credit_cards(text, url)
28
+ when :custom
29
+ find_matches(text, url)
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def find_emails(text, url)
36
+ matches = text.scan(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/)
37
+ add_matches(matches, url, 'emails')
38
+ end
39
+
40
+ def find_phone_numbers(text, url)
41
+ matches = text.scan(/\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/)
42
+ add_matches(matches, url, 'phone_numbers')
43
+ end
44
+
45
+ def find_urls(text, url)
46
+ matches = text.scan(/https?:\/\/[\S]+/)
47
+ add_matches(matches, url, 'urls')
48
+ end
49
+
50
+ def find_social_media(text, url)
51
+ matches = text.scan(/@[\w]+/)
52
+ add_matches(matches, url, 'social_media')
53
+ end
54
+
55
+ def find_addresses(text, url)
56
+ matches = text.scan(/\d+\s+([^\d\n]+\s)+(St|Ave|Rd|Blvd|Dr|Lane|Way)\.?/i)
57
+ add_matches(matches.map(&:join), url, 'addresses')
58
+ end
59
+
60
+ def find_credit_cards(text, url)
61
+ matches = text.scan(/\b(?:\d{4}[-\s]?){3}\d{4}\b/)
62
+ add_matches(matches, url, 'credit_cards')
63
+ end
64
+
65
+ def find_matches(text, url)
66
+ matches = text.scan(@pattern)
67
+ add_matches(matches, url, 'custom')
68
+ end
69
+
70
+ def add_matches(matches, url, type)
71
+ matches.each do |match|
72
+ @matches[match].add(url)
73
+ GatheringMode.save_to_gathering(match, type) if @gathering_mode
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,41 @@
1
+ require 'yaml'
2
+ require 'fileutils'
3
+
4
+ module WebMole
5
+ class UserAgentSwitcher
6
+ CONFIG_DIR = File.join(Dir.home, '.config', 'webmole')
7
+ CONFIG_FILE = File.join(CONFIG_DIR, 'user_agents.yml')
8
+
9
+ def initialize
10
+ ensure_config_file_exists
11
+ @user_agents = YAML.load_file(CONFIG_FILE)
12
+ rescue StandardError => e
13
+ puts "Warning: Error loading user_agents.yml: #{e.message}. Using default user agent.".colorize(:yellow)
14
+ @user_agents = ['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36']
15
+ end
16
+
17
+ def random_user_agent
18
+ @user_agents.sample
19
+ end
20
+
21
+ private
22
+
23
+ def ensure_config_file_exists
24
+ return if File.exist?(CONFIG_FILE)
25
+
26
+ FileUtils.mkdir_p(CONFIG_DIR)
27
+ File.write(CONFIG_FILE, default_user_agents.to_yaml)
28
+ puts "Created default user_agents.yml in #{CONFIG_FILE}".colorize(:green)
29
+ end
30
+
31
+ def default_user_agents
32
+ [
33
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
34
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
35
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
36
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
37
+ 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1'
38
+ ]
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,3 @@
1
+ module WebMole
2
+ VERSION = '1.0.0'
3
+ end
data/lib/webmole.rb ADDED
@@ -0,0 +1,51 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'uri'
4
+ require 'set'
5
+ require 'yaml'
6
+ require 'csv'
7
+ require 'colorize'
8
+
9
+ require_relative 'webmole/version'
10
+ require_relative 'webmole/crawler'
11
+ require_relative 'webmole/scraper'
12
+ require_relative 'webmole/output_formatter'
13
+ require_relative 'webmole/user_agent_switcher'
14
+ require_relative 'webmole/gathering_mode'
15
+
16
+ module WebMole
17
+ class Error < StandardError; end
18
+
19
+ def self.run(options)
20
+ crawler = Crawler.new(options)
21
+ crawler.crawl
22
+ end
23
+
24
+ def self.print_banner
25
+ puts <<-'EOB'.colorize(:light_blue)
26
+ __ __ _ __ __ _
27
+ \ \ / /__| |__ | \/ | ___ | | ___
28
+ \ \ /\ / / _ \ '_ \| |\/| |/ _ \| |/ _ \
29
+ \ V V / __/ |_) | | | | (_) | | __/
30
+ \_/\_/ \___|_.__/|_| |_|\___/|_|\___|
31
+
32
+ EOB
33
+ puts "WebMole v#{VERSION}".colorize(:light_cyan)
34
+ puts "A relatively powerful web scraper.".colorize(:light_green)
35
+ puts
36
+ end
37
+
38
+ def self.print_disclaimer
39
+ puts "DISCLAIMER:".colorize(:yellow)
40
+ puts <<-EOD.colorize(:light_yellow)
41
+ This tool is for educational and ethical use only. The user bears all responsibility
42
+ for ensuring compliance with applicable laws, regulations, and website terms of service.
43
+ Misuse of this tool may be illegal and/or unethical. Always obtain proper authorization
44
+ before scraping any website.
45
+
46
+ Contact: Subnetmasked <subnetmasked@cock.li>
47
+
48
+ By using this tool, you agree to these terms and conditions.
49
+ EOD
50
+ end
51
+ end
data/webmole.gemspec ADDED
@@ -0,0 +1,30 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'webmole/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "webmole"
7
+ spec.version = WebMole::VERSION
8
+ spec.authors = ["Subnetmasked"]
9
+ spec.email = ["subnetmasked@cock.li"]
10
+ spec.summary = %q{A powerful web scraper}
11
+ spec.description = %q{WebMole is a Ruby-based web scraper with multiple features including email extraction, phone number scraping, and more.}
12
+ spec.homepage = "https://github.com/subnetmasked/webmole"
13
+ spec.license = "MIT"
14
+ spec.required_ruby_version = '>= 3.0'
15
+
16
+ spec.files = Dir['lib/**/*', 'bin/*', 'LICENSE.txt', '*.md', 'webmole.gemspec']
17
+ spec.bindir = "bin"
18
+ spec.executables = ["webmole"]
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "nokogiri", "~> 1.11"
22
+ spec.add_dependency "colorize", "~> 0.8"
23
+ spec.add_dependency "optparse", "~> 0.5.0"
24
+ spec.add_dependency "uri", "~> 0.13.1"
25
+ spec.add_dependency "csv", "~> 3.2"
26
+
27
+ spec.add_development_dependency "bundler", "~> 2.0"
28
+ spec.add_development_dependency "rake", "~> 13.0"
29
+ spec.add_development_dependency "rspec", "~> 3.10"
30
+ end
metadata ADDED
@@ -0,0 +1,168 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: webmole
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Subnetmasked
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-10-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: colorize
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.8'
41
+ - !ruby/object:Gem::Dependency
42
+ name: optparse
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.5.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.5.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: uri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.13.1
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.13.1
69
+ - !ruby/object:Gem::Dependency
70
+ name: csv
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.2'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: bundler
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rake
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '13.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '13.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rspec
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '3.10'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '3.10'
125
+ description: WebMole is a Ruby-based web scraper with multiple features including
126
+ email extraction, phone number scraping, and more.
127
+ email:
128
+ - subnetmasked@cock.li
129
+ executables:
130
+ - webmole
131
+ extensions: []
132
+ extra_rdoc_files: []
133
+ files:
134
+ - LICENSE.txt
135
+ - README.md
136
+ - bin/webmole
137
+ - lib/webmole.rb
138
+ - lib/webmole/crawler.rb
139
+ - lib/webmole/gathering_mode.rb
140
+ - lib/webmole/output_formatter.rb
141
+ - lib/webmole/scraper.rb
142
+ - lib/webmole/user_agent_switcher.rb
143
+ - lib/webmole/version.rb
144
+ - webmole.gemspec
145
+ homepage: https://github.com/subnetmasked/webmole
146
+ licenses:
147
+ - MIT
148
+ metadata: {}
149
+ post_install_message:
150
+ rdoc_options: []
151
+ require_paths:
152
+ - lib
153
+ required_ruby_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - ">="
156
+ - !ruby/object:Gem::Version
157
+ version: '3.0'
158
+ required_rubygems_version: !ruby/object:Gem::Requirement
159
+ requirements:
160
+ - - ">="
161
+ - !ruby/object:Gem::Version
162
+ version: '0'
163
+ requirements: []
164
+ rubygems_version: 3.5.21
165
+ signing_key:
166
+ specification_version: 4
167
+ summary: A powerful web scraper
168
+ test_files: []