janis 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -2
- data/Rakefile +2 -2
- data/janis.gemspec +1 -0
- data/lib/janis.rb +48 -18
- data/lib/janis/parser_factory.rb +17 -8
- data/lib/janis/parsing.rb +2 -2
- data/lib/janis/proxy_website_parser.rb +12 -0
- data/lib/janis/specific_parsers/proxy-list_org.rb +1 -1
- data/lib/janis/testing.rb +21 -0
- data/lib/janis/version.rb +1 -1
- metadata +33 -21
- data/lib/janis/specific_parsers/simple.rb +0 -23
- data/proxy_server_list.yml +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d5f0f9b8e3e10fbb572bb3d39194c58edbeaea5b
|
4
|
+
data.tar.gz: eed32f6e8a3d1c1542286a024c26f9bd17a2e886
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c32861860e3bccbaf2027dc633eb56fc5ea3b6923a56f87792ab6df020e3a8b685ddc2d755e13aebfc6a8bf911130ccf48303cedadb3ec89657833e6aa00c167
|
7
|
+
data.tar.gz: 084faba3c410b9793f0665b8ac1b780d704c599326fec582d4a42f9122e3745ee3f34dd60056dc94293acadaffda2d1a6826fa1cd21eb85ba9da58968f54c6e8
|
data/README.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
#####Dependency issues are welcome to be reported in this repo at Issues section. Please include:
|
2
|
+
1. Your Operating System + architecture (Example: "Ubuntu 32 bits").
|
3
|
+
2. Full error backtrace.
|
4
|
+
3. Your ruby version (you can see it by typing "ruby -v" in your command prompt.
|
5
|
+
|
1
6
|
# Janis
|
2
7
|
|
3
8
|
Janis will help you find proxy servers quickly, by grabbing them from a list of many (hopefully available and up-to-date) proxy listing websites. You can also tell Janis to parse from a specific website and it will do it if it knows how to. If it doesn't you can improve it by adding new Parsers (more on this on Usage section).
|
@@ -17,6 +22,17 @@ And then execute:
|
|
17
22
|
Or install it yourself as:
|
18
23
|
|
19
24
|
$ gem install janis
|
25
|
+
|
26
|
+
Then download the latest version of PhantomJS from http://phantomjs.org/download.html, according
|
27
|
+
to your platform.
|
28
|
+
|
29
|
+
Place the PhantomJs executable somewhere in your PATH.
|
30
|
+
|
31
|
+
On Unix, you can see your path from your shell by typing '$PATH'.
|
32
|
+
Common folders to place phantomjs binary in are /usr/bin and usr/local/bin.
|
33
|
+
|
34
|
+
On Windows, you can consult your PATH from your system settings in "Environment Variables" section.
|
35
|
+
C:\windows\system32\ is a common location you can place phantomjs.exe in.
|
20
36
|
|
21
37
|
## Usage
|
22
38
|
From your own script/app or from irb, require the gem with:
|
@@ -85,8 +101,6 @@ If there's a proxy listing website you consider reliable and up-to-date which yo
|
|
85
101
|
|
86
102
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
87
103
|
|
88
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
89
|
-
|
90
104
|
## Contributing
|
91
105
|
|
92
106
|
Bug reports and pull requests are welcome on GitHub at https://github.com/mgiagante/janis.
|
data/Rakefile
CHANGED
@@ -2,9 +2,9 @@ require "bundler/gem_tasks"
|
|
2
2
|
require "rake/testtask"
|
3
3
|
|
4
4
|
Rake::TestTask.new(:test) do |t|
|
5
|
-
t.libs << "
|
5
|
+
t.libs << "spec"
|
6
6
|
t.libs << "lib"
|
7
|
-
t.test_files = FileList['
|
7
|
+
t.test_files = FileList['spec/**/*_spec.rb']
|
8
8
|
end
|
9
9
|
|
10
10
|
task :default => :test
|
data/janis.gemspec
CHANGED
data/lib/janis.rb
CHANGED
@@ -1,37 +1,67 @@
|
|
1
1
|
require 'janis/version'
|
2
2
|
require 'janis/parsing'
|
3
|
-
require '
|
3
|
+
require 'janis/testing'
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
# TODO: Sites to be supported for scraping
|
8
|
+
# http://incloak.es/proxy-list/
|
9
|
+
# http://spys.ru/free-proxy-list/
|
10
|
+
# http://www.samair.ru/proxy/
|
11
|
+
# http://www.proxys.com.ar/
|
4
12
|
|
5
13
|
module Janis
|
6
14
|
|
7
15
|
IP_PORT_SEPARATOR = ':'
|
8
|
-
PROXY_LIST_PATH = File.dirname(__FILE__) + '/../proxy_server_list.yml'
|
9
16
|
|
10
|
-
def self.find(amount,
|
17
|
+
def self.find(amount, opts = {})
|
11
18
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
if results.size < amount
|
17
|
-
parsed_from_url = Parsing.parse(url) unless url.include?('#') # Elements should look like ["1.1.1.1:8080", "2.2.2.2:9090"]
|
18
|
-
results_from_this_url = parsed_from_url.map { |entry| convert_to_hash(entry) }
|
19
|
-
# Result should look like [ { ip: "1.1.1.1", port: "8080" }, { ip: "2.2.2.2", port: "9090" } ]
|
20
|
-
results += results_from_this_url
|
19
|
+
# Makes sure opts[:websites] is a subset of the supported websites. Otherwise, it takes the whole list.
|
20
|
+
if opts[:websites]
|
21
|
+
opts[:websites].each do |website|
|
22
|
+
raise "#{website} is not supported!" unless Janis.supported_websites.include?(website)
|
21
23
|
end
|
22
|
-
|
24
|
+
websites = opts[:websites]
|
25
|
+
else
|
26
|
+
websites = Janis.supported_websites
|
27
|
+
end
|
23
28
|
|
24
|
-
|
25
|
-
|
29
|
+
total_results = []
|
30
|
+
|
31
|
+
websites.each do |website|
|
32
|
+
if total_results.size < amount
|
33
|
+
new_results = Parsing.parse_from(website).map { |entry| build_proxy_hash(entry, website) }
|
34
|
+
total_results += new_results
|
35
|
+
end
|
36
|
+
end
|
37
|
+
opts[:criteria] ? Janis::Testing.filter_results(criteria, total_results[0..amount - 1]) : total_results[0..amount -1]
|
26
38
|
end
|
27
|
-
|
39
|
+
|
40
|
+
def self.supported_websites
|
41
|
+
Janis::Parsing::SpecificParsers::ProxyWebsiteParser.subclasses.map { |klass| self.website_name_for(klass.to_s)}
|
42
|
+
end
|
43
|
+
|
28
44
|
private
|
29
45
|
|
30
|
-
def self.
|
46
|
+
def self.build_proxy_hash(proxy_string, website)
|
31
47
|
{
|
32
48
|
ip: proxy_string.split(IP_PORT_SEPARATOR).first,
|
33
|
-
port: proxy_string.split(IP_PORT_SEPARATOR).last
|
49
|
+
port: proxy_string.split(IP_PORT_SEPARATOR).last,
|
50
|
+
source: website
|
34
51
|
}
|
35
52
|
end
|
36
53
|
|
54
|
+
#TODO: This should be probably moved to a name helper module
|
55
|
+
def self.website_name_for(parser_klass_name)
|
56
|
+
parser_klass_name.gsub(/::/, '/').
|
57
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
58
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
59
|
+
tr("-", "_").
|
60
|
+
gsub("_Parser","").
|
61
|
+
split('/').
|
62
|
+
last.
|
63
|
+
downcase.to_sym
|
64
|
+
#TODO: converts a parser class name to a :symbol_in_snake_case website name
|
65
|
+
end
|
66
|
+
|
37
67
|
end
|
data/lib/janis/parser_factory.rb
CHANGED
@@ -7,14 +7,23 @@ module Janis
|
|
7
7
|
|
8
8
|
class ParserFactory
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
10
|
+
attr_reader :parser_klasses
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@parser_klasses = Janis::Parsing::SpecificParsers::ProxyWebsiteParser.subclasses
|
14
|
+
end
|
15
|
+
|
16
|
+
def create_parser(website_name)
|
17
|
+
namespacing_prefix = "Janis::Parsing::SpecificParsers::"
|
18
|
+
@parser_klasses.find { |parser_klass| parser_klass.to_s == namespacing_prefix + parser_klass_name_for(website_name) }.new
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
#TODO: This should be probably moved to a name helper module
|
24
|
+
# website_name should be a :symbol_in_snake_lower_case. eg: :hide_my_ass will mean HideMyAssParser
|
25
|
+
def parser_klass_name_for(website_name)
|
26
|
+
website_name.to_s.split('_').map { |word| word.capitalize}.join + "Parser"
|
18
27
|
end
|
19
28
|
|
20
29
|
end
|
data/lib/janis/parsing.rb
CHANGED
@@ -4,6 +4,18 @@ module Janis
|
|
4
4
|
|
5
5
|
module SpecificParsers
|
6
6
|
|
7
|
+
class Proxy
|
8
|
+
|
9
|
+
def initialize(attribs = {})
|
10
|
+
@attribs = attribs
|
11
|
+
end
|
12
|
+
|
13
|
+
def method_missing?(message)
|
14
|
+
@attribs[message] || super
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
7
19
|
class ProxyWebsiteParser
|
8
20
|
|
9
21
|
attr_reader :url
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'net/ping'
|
2
|
+
|
3
|
+
module Janis
|
4
|
+
|
5
|
+
module Testing
|
6
|
+
|
7
|
+
def self.connectable?(proxy)
|
8
|
+
host, port = proxy.split(':')
|
9
|
+
return Net::Ping::TCP.new(host, port).ping
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.filter_results(criteria = [], results)
|
13
|
+
criteria.each do |criterion| # A criterion is a method that returns true or false about a proxy, like #connectable?
|
14
|
+
results.select! { |proxy| Janis::Testing.send(criterion, "#{proxy[:ip]}:#{proxy[:port]}") }
|
15
|
+
end
|
16
|
+
results
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
data/lib/janis/version.rb
CHANGED
metadata
CHANGED
@@ -1,97 +1,111 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: janis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mariano Giagante
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.10'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.10'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '10.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - '>='
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '5.4'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - '>='
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '5.4'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: pry
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - '>='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: nokogiri
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - '>='
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '1.6'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - '>='
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '1.6'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: poltergeist
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - '>='
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: net-ping
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
95
109
|
- !ruby/object:Gem::Version
|
96
110
|
version: '0'
|
97
111
|
description: It uses a source list with several testes websites to provide proxy servers
|
@@ -102,8 +116,8 @@ executables: []
|
|
102
116
|
extensions: []
|
103
117
|
extra_rdoc_files: []
|
104
118
|
files:
|
105
|
-
-
|
106
|
-
-
|
119
|
+
- .gitignore
|
120
|
+
- .travis.yml
|
107
121
|
- Gemfile
|
108
122
|
- LICENSE
|
109
123
|
- README.md
|
@@ -118,11 +132,10 @@ files:
|
|
118
132
|
- lib/janis/specific_parsers/hide_my_ass.rb
|
119
133
|
- lib/janis/specific_parsers/parsing_tools/capybara_with_phantom_js.rb
|
120
134
|
- lib/janis/specific_parsers/proxy-list_org.rb
|
121
|
-
- lib/janis/specific_parsers/simple.rb
|
122
135
|
- lib/janis/specific_parsers/template.rb
|
136
|
+
- lib/janis/testing.rb
|
123
137
|
- lib/janis/validations.rb
|
124
138
|
- lib/janis/version.rb
|
125
|
-
- proxy_server_list.yml
|
126
139
|
homepage: http://www.github.com/mgiagante/janis
|
127
140
|
licenses:
|
128
141
|
- MIT
|
@@ -133,12 +146,12 @@ require_paths:
|
|
133
146
|
- lib
|
134
147
|
required_ruby_version: !ruby/object:Gem::Requirement
|
135
148
|
requirements:
|
136
|
-
- -
|
149
|
+
- - '>='
|
137
150
|
- !ruby/object:Gem::Version
|
138
151
|
version: '0'
|
139
152
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
153
|
requirements:
|
141
|
-
- -
|
154
|
+
- - '>='
|
142
155
|
- !ruby/object:Gem::Version
|
143
156
|
version: '0'
|
144
157
|
requirements: []
|
@@ -148,4 +161,3 @@ signing_key:
|
|
148
161
|
specification_version: 4
|
149
162
|
summary: Janis grabs proxy servers from many websites for you!
|
150
163
|
test_files: []
|
151
|
-
has_rdoc:
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module Janis
|
2
|
-
|
3
|
-
module Parsing
|
4
|
-
|
5
|
-
module SpecificParsers
|
6
|
-
class SimpleParser < ProxyWebsiteParser
|
7
|
-
PROXY_REGEX = /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\:\d{1,5}/
|
8
|
-
def initialize
|
9
|
-
super
|
10
|
-
@html_doc = obtain_html_doc
|
11
|
-
end
|
12
|
-
def parse
|
13
|
-
@result ||= @html_doc.to_s.scan(PROXY_REGEX)
|
14
|
-
end
|
15
|
-
def self.url
|
16
|
-
'file://./test/html/simple.html'
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|