janis 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -2
- data/Rakefile +2 -2
- data/janis.gemspec +1 -0
- data/lib/janis.rb +48 -18
- data/lib/janis/parser_factory.rb +17 -8
- data/lib/janis/parsing.rb +2 -2
- data/lib/janis/proxy_website_parser.rb +12 -0
- data/lib/janis/specific_parsers/proxy-list_org.rb +1 -1
- data/lib/janis/testing.rb +21 -0
- data/lib/janis/version.rb +1 -1
- metadata +33 -21
- data/lib/janis/specific_parsers/simple.rb +0 -23
- data/proxy_server_list.yml +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d5f0f9b8e3e10fbb572bb3d39194c58edbeaea5b
|
4
|
+
data.tar.gz: eed32f6e8a3d1c1542286a024c26f9bd17a2e886
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c32861860e3bccbaf2027dc633eb56fc5ea3b6923a56f87792ab6df020e3a8b685ddc2d755e13aebfc6a8bf911130ccf48303cedadb3ec89657833e6aa00c167
|
7
|
+
data.tar.gz: 084faba3c410b9793f0665b8ac1b780d704c599326fec582d4a42f9122e3745ee3f34dd60056dc94293acadaffda2d1a6826fa1cd21eb85ba9da58968f54c6e8
|
data/README.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
#####Dependency issues are welcome to be reported in this repo at Issues section. Please include:
|
2
|
+
1. Your Operating System + architecture (Example: "Ubuntu 32 bits").
|
3
|
+
2. Full error backtrace.
|
4
|
+
3. Your ruby version (you can see it by typing "ruby -v" in your command prompt.
|
5
|
+
|
1
6
|
# Janis
|
2
7
|
|
3
8
|
Janis will help you find proxy servers quickly, by grabbing them from a list of many (hopefully available and up-to-date) proxy listing websites. You can also tell Janis to parse from a specific website and it will do it if it knows how to. If it doesn't you can improve it by adding new Parsers (more on this on Usage section).
|
@@ -17,6 +22,17 @@ And then execute:
|
|
17
22
|
Or install it yourself as:
|
18
23
|
|
19
24
|
$ gem install janis
|
25
|
+
|
26
|
+
Then download the latest version of PhantomJS from http://phantomjs.org/download.html, according
|
27
|
+
to your platform.
|
28
|
+
|
29
|
+
Place the PhantomJs executable somewhere in your PATH.
|
30
|
+
|
31
|
+
On Unix, you can see your path from your shell by typing '$PATH'.
|
32
|
+
Common folders to place phantomjs binary in are /usr/bin and usr/local/bin.
|
33
|
+
|
34
|
+
On Windows, you can consult your PATH from your system settings in "Environment Variables" section.
|
35
|
+
C:\windows\system32\ is a common location you can place phantomjs.exe in.
|
20
36
|
|
21
37
|
## Usage
|
22
38
|
From your own script/app or from irb, require the gem with:
|
@@ -85,8 +101,6 @@ If there's a proxy listing website you consider reliable and up-to-date which yo
|
|
85
101
|
|
86
102
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
87
103
|
|
88
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
89
|
-
|
90
104
|
## Contributing
|
91
105
|
|
92
106
|
Bug reports and pull requests are welcome on GitHub at https://github.com/mgiagante/janis.
|
data/Rakefile
CHANGED
@@ -2,9 +2,9 @@ require "bundler/gem_tasks"
|
|
2
2
|
require "rake/testtask"
|
3
3
|
|
4
4
|
Rake::TestTask.new(:test) do |t|
|
5
|
-
t.libs << "
|
5
|
+
t.libs << "spec"
|
6
6
|
t.libs << "lib"
|
7
|
-
t.test_files = FileList['
|
7
|
+
t.test_files = FileList['spec/**/*_spec.rb']
|
8
8
|
end
|
9
9
|
|
10
10
|
task :default => :test
|
data/janis.gemspec
CHANGED
data/lib/janis.rb
CHANGED
@@ -1,37 +1,67 @@
|
|
1
1
|
require 'janis/version'
|
2
2
|
require 'janis/parsing'
|
3
|
-
require '
|
3
|
+
require 'janis/testing'
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
# TODO: Sites to be supported for scraping
|
8
|
+
# http://incloak.es/proxy-list/
|
9
|
+
# http://spys.ru/free-proxy-list/
|
10
|
+
# http://www.samair.ru/proxy/
|
11
|
+
# http://www.proxys.com.ar/
|
4
12
|
|
5
13
|
module Janis
|
6
14
|
|
7
15
|
IP_PORT_SEPARATOR = ':'
|
8
|
-
PROXY_LIST_PATH = File.dirname(__FILE__) + '/../proxy_server_list.yml'
|
9
16
|
|
10
|
-
def self.find(amount,
|
17
|
+
def self.find(amount, opts = {})
|
11
18
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
if results.size < amount
|
17
|
-
parsed_from_url = Parsing.parse(url) unless url.include?('#') # Elements should look like ["1.1.1.1:8080", "2.2.2.2:9090"]
|
18
|
-
results_from_this_url = parsed_from_url.map { |entry| convert_to_hash(entry) }
|
19
|
-
# Result should look like [ { ip: "1.1.1.1", port: "8080" }, { ip: "2.2.2.2", port: "9090" } ]
|
20
|
-
results += results_from_this_url
|
19
|
+
# Makes sure opts[:websites] is a subset of the supported websites. Otherwise, it takes the whole list.
|
20
|
+
if opts[:websites]
|
21
|
+
opts[:websites].each do |website|
|
22
|
+
raise "#{website} is not supported!" unless Janis.supported_websites.include?(website)
|
21
23
|
end
|
22
|
-
|
24
|
+
websites = opts[:websites]
|
25
|
+
else
|
26
|
+
websites = Janis.supported_websites
|
27
|
+
end
|
23
28
|
|
24
|
-
|
25
|
-
|
29
|
+
total_results = []
|
30
|
+
|
31
|
+
websites.each do |website|
|
32
|
+
if total_results.size < amount
|
33
|
+
new_results = Parsing.parse_from(website).map { |entry| build_proxy_hash(entry, website) }
|
34
|
+
total_results += new_results
|
35
|
+
end
|
36
|
+
end
|
37
|
+
opts[:criteria] ? Janis::Testing.filter_results(criteria, total_results[0..amount - 1]) : total_results[0..amount -1]
|
26
38
|
end
|
27
|
-
|
39
|
+
|
40
|
+
def self.supported_websites
|
41
|
+
Janis::Parsing::SpecificParsers::ProxyWebsiteParser.subclasses.map { |klass| self.website_name_for(klass.to_s)}
|
42
|
+
end
|
43
|
+
|
28
44
|
private
|
29
45
|
|
30
|
-
def self.
|
46
|
+
def self.build_proxy_hash(proxy_string, website)
|
31
47
|
{
|
32
48
|
ip: proxy_string.split(IP_PORT_SEPARATOR).first,
|
33
|
-
port: proxy_string.split(IP_PORT_SEPARATOR).last
|
49
|
+
port: proxy_string.split(IP_PORT_SEPARATOR).last,
|
50
|
+
source: website
|
34
51
|
}
|
35
52
|
end
|
36
53
|
|
54
|
+
#TODO: This should be probably moved to a name helper module
|
55
|
+
def self.website_name_for(parser_klass_name)
|
56
|
+
parser_klass_name.gsub(/::/, '/').
|
57
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
58
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
59
|
+
tr("-", "_").
|
60
|
+
gsub("_Parser","").
|
61
|
+
split('/').
|
62
|
+
last.
|
63
|
+
downcase.to_sym
|
64
|
+
#TODO: converts a parser class name to a :symbol_in_snake_case website name
|
65
|
+
end
|
66
|
+
|
37
67
|
end
|
data/lib/janis/parser_factory.rb
CHANGED
@@ -7,14 +7,23 @@ module Janis
|
|
7
7
|
|
8
8
|
class ParserFactory
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
10
|
+
attr_reader :parser_klasses
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@parser_klasses = Janis::Parsing::SpecificParsers::ProxyWebsiteParser.subclasses
|
14
|
+
end
|
15
|
+
|
16
|
+
def create_parser(website_name)
|
17
|
+
namespacing_prefix = "Janis::Parsing::SpecificParsers::"
|
18
|
+
@parser_klasses.find { |parser_klass| parser_klass.to_s == namespacing_prefix + parser_klass_name_for(website_name) }.new
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
#TODO: This should be probably moved to a name helper module
|
24
|
+
# website_name should be a :symbol_in_snake_lower_case. eg: :hide_my_ass will mean HideMyAssParser
|
25
|
+
def parser_klass_name_for(website_name)
|
26
|
+
website_name.to_s.split('_').map { |word| word.capitalize}.join + "Parser"
|
18
27
|
end
|
19
28
|
|
20
29
|
end
|
data/lib/janis/parsing.rb
CHANGED
@@ -4,6 +4,18 @@ module Janis
|
|
4
4
|
|
5
5
|
module SpecificParsers
|
6
6
|
|
7
|
+
class Proxy
|
8
|
+
|
9
|
+
def initialize(attribs = {})
|
10
|
+
@attribs = attribs
|
11
|
+
end
|
12
|
+
|
13
|
+
def method_missing?(message)
|
14
|
+
@attribs[message] || super
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
7
19
|
class ProxyWebsiteParser
|
8
20
|
|
9
21
|
attr_reader :url
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'net/ping'
|
2
|
+
|
3
|
+
module Janis
|
4
|
+
|
5
|
+
module Testing
|
6
|
+
|
7
|
+
def self.connectable?(proxy)
|
8
|
+
host, port = proxy.split(':')
|
9
|
+
return Net::Ping::TCP.new(host, port).ping
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.filter_results(criteria = [], results)
|
13
|
+
criteria.each do |criterion| # A criterion is a method that returns true or false about a proxy, like #connectable?
|
14
|
+
results.select! { |proxy| Janis::Testing.send(criterion, "#{proxy[:ip]}:#{proxy[:port]}") }
|
15
|
+
end
|
16
|
+
results
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
data/lib/janis/version.rb
CHANGED
metadata
CHANGED
@@ -1,97 +1,111 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: janis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mariano Giagante
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.10'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.10'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '10.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - '>='
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '5.4'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - '>='
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '5.4'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: pry
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - '>='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: nokogiri
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - '>='
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '1.6'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - '>='
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '1.6'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: poltergeist
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - '>='
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: net-ping
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
95
109
|
- !ruby/object:Gem::Version
|
96
110
|
version: '0'
|
97
111
|
description: It uses a source list with several testes websites to provide proxy servers
|
@@ -102,8 +116,8 @@ executables: []
|
|
102
116
|
extensions: []
|
103
117
|
extra_rdoc_files: []
|
104
118
|
files:
|
105
|
-
-
|
106
|
-
-
|
119
|
+
- .gitignore
|
120
|
+
- .travis.yml
|
107
121
|
- Gemfile
|
108
122
|
- LICENSE
|
109
123
|
- README.md
|
@@ -118,11 +132,10 @@ files:
|
|
118
132
|
- lib/janis/specific_parsers/hide_my_ass.rb
|
119
133
|
- lib/janis/specific_parsers/parsing_tools/capybara_with_phantom_js.rb
|
120
134
|
- lib/janis/specific_parsers/proxy-list_org.rb
|
121
|
-
- lib/janis/specific_parsers/simple.rb
|
122
135
|
- lib/janis/specific_parsers/template.rb
|
136
|
+
- lib/janis/testing.rb
|
123
137
|
- lib/janis/validations.rb
|
124
138
|
- lib/janis/version.rb
|
125
|
-
- proxy_server_list.yml
|
126
139
|
homepage: http://www.github.com/mgiagante/janis
|
127
140
|
licenses:
|
128
141
|
- MIT
|
@@ -133,12 +146,12 @@ require_paths:
|
|
133
146
|
- lib
|
134
147
|
required_ruby_version: !ruby/object:Gem::Requirement
|
135
148
|
requirements:
|
136
|
-
- -
|
149
|
+
- - '>='
|
137
150
|
- !ruby/object:Gem::Version
|
138
151
|
version: '0'
|
139
152
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
153
|
requirements:
|
141
|
-
- -
|
154
|
+
- - '>='
|
142
155
|
- !ruby/object:Gem::Version
|
143
156
|
version: '0'
|
144
157
|
requirements: []
|
@@ -148,4 +161,3 @@ signing_key:
|
|
148
161
|
specification_version: 4
|
149
162
|
summary: Janis grabs proxy servers from many websites for you!
|
150
163
|
test_files: []
|
151
|
-
has_rdoc:
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module Janis
|
2
|
-
|
3
|
-
module Parsing
|
4
|
-
|
5
|
-
module SpecificParsers
|
6
|
-
class SimpleParser < ProxyWebsiteParser
|
7
|
-
PROXY_REGEX = /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\:\d{1,5}/
|
8
|
-
def initialize
|
9
|
-
super
|
10
|
-
@html_doc = obtain_html_doc
|
11
|
-
end
|
12
|
-
def parse
|
13
|
-
@result ||= @html_doc.to_s.scan(PROXY_REGEX)
|
14
|
-
end
|
15
|
-
def self.url
|
16
|
-
'file://./test/html/simple.html'
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|