ids_please 1.1.4 → 2.0.0.beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ids_please.gemspec +1 -1
- data/lib/ids_please/grabbers/base.rb +39 -0
- data/lib/ids_please/grabbers/facebook.rb +30 -0
- data/lib/ids_please/grabbers/instagram.rb +34 -0
- data/lib/ids_please/grabbers/vkontakte.rb +27 -0
- data/lib/ids_please/grabbers.rb +25 -0
- data/lib/ids_please/parsers/ameba.rb +9 -0
- data/lib/ids_please/parsers/base.rb +31 -0
- data/lib/ids_please/parsers/blogger.rb +26 -0
- data/lib/ids_please/parsers/facebook.rb +25 -0
- data/lib/ids_please/parsers/google_plus.rb +29 -0
- data/lib/ids_please/parsers/hi5.rb +23 -0
- data/lib/ids_please/parsers/instagram.rb +9 -0
- data/lib/ids_please/parsers/linkedin.rb +25 -0
- data/lib/ids_please/parsers/livejournal.rb +20 -0
- data/lib/ids_please/parsers/moikrug.rb +24 -0
- data/lib/ids_please/parsers/odnoklassniki.rb +23 -0
- data/lib/ids_please/parsers/pinterest.rb +9 -0
- data/lib/ids_please/parsers/reddit.rb +17 -0
- data/lib/ids_please/parsers/soundcloud.rb +9 -0
- data/lib/ids_please/parsers/tumblr.rb +18 -0
- data/lib/ids_please/parsers/twitter.rb +22 -0
- data/lib/ids_please/parsers/vimeo.rb +9 -0
- data/lib/ids_please/parsers/vkontakte.rb +26 -0
- data/lib/ids_please/parsers/youtube.rb +21 -0
- data/lib/ids_please/parsers.rb +59 -0
- data/lib/ids_please/version.rb +3 -0
- data/lib/ids_please.rb +39 -49
- metadata +30 -23
- data/lib/ids_please/ameba.rb +0 -7
- data/lib/ids_please/base_parser.rb +0 -29
- data/lib/ids_please/blogger.rb +0 -25
- data/lib/ids_please/facebook.rb +0 -23
- data/lib/ids_please/google_plus.rb +0 -27
- data/lib/ids_please/hi5.rb +0 -21
- data/lib/ids_please/instagram.rb +0 -7
- data/lib/ids_please/linkedin.rb +0 -23
- data/lib/ids_please/livejournal.rb +0 -18
- data/lib/ids_please/moikrug.rb +0 -22
- data/lib/ids_please/odnoklassniki.rb +0 -21
- data/lib/ids_please/pinterest.rb +0 -7
- data/lib/ids_please/reddit.rb +0 -15
- data/lib/ids_please/soundcloud.rb +0 -7
- data/lib/ids_please/tumblr.rb +0 -16
- data/lib/ids_please/twitter.rb +0 -20
- data/lib/ids_please/vimeo.rb +0 -7
- data/lib/ids_please/vkontakte.rb +0 -24
- data/lib/ids_please/youtube.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 469bb6ff0049182ad08b50e28e3fd2512242053b
|
4
|
+
data.tar.gz: f86b0623719dc70dbda4f146290461c733854dad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 833e28d637f3e3b475564a85417205798b847acb6c36ac6479add5941fd2d578d8c87b3c670d79b71f852056b7e6198069d027548563a84946bd6403e91ee6ed
|
7
|
+
data.tar.gz: 5262f8ed1a10f7e6330a29c73fc86c60fdd434d919c1ae2aaec4e03deec23cf16d4552819652ce11f6e263019daa79113c2277956dffadfd772a8d10f84982c1
|
data/ids_please.gemspec
CHANGED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'byebug'
|
3
|
+
|
4
|
+
class IdsPlease
|
5
|
+
module Grabbers
|
6
|
+
class Base
|
7
|
+
|
8
|
+
def self.interact(links)
|
9
|
+
links.map { |l| self.new(l).grab_link }
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :avatar, :display_name, :username, :link, :page_source, :network_id, :data
|
13
|
+
|
14
|
+
def initialize(link)
|
15
|
+
@link = link
|
16
|
+
end
|
17
|
+
|
18
|
+
def grab_link(link)
|
19
|
+
throw 'Base grabber can not grab anything'
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s
|
23
|
+
line = ''
|
24
|
+
self.instance_variables.each do |iv|
|
25
|
+
next if iv == :@page_source
|
26
|
+
val = self.instance_variable_get(iv)
|
27
|
+
next if val.nil? || val == ''
|
28
|
+
line += ", #{iv}=#{val}"
|
29
|
+
end
|
30
|
+
"#{self.class}##{self.object_id} #{line[1..-1]}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def inspect
|
34
|
+
to_s
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Grabbers
|
3
|
+
class Facebook < IdsPlease::Grabbers::Base
|
4
|
+
|
5
|
+
def grab_link
|
6
|
+
@page_source ||= open(link).read
|
7
|
+
@network_id = @page_source.scan(/entity_id":"(\d+)"/).flatten.first
|
8
|
+
@avatar = @page_source.scan(/og:image" content="([^"]+)"/).flatten.first
|
9
|
+
@display_name = @page_source.scan(/og:title" content="([^"]+)"/).flatten.first
|
10
|
+
@username = @page_source.scan(/og:url" content=".+\/([^\/"]+)"/).flatten.first
|
11
|
+
@avatar = CGI.unescapeHTML(@avatar.encode('utf-8')) if @avatar
|
12
|
+
@display_name = CGI.unescapeHTML(@display_name.encode('utf-8')) if @display_name
|
13
|
+
@data = {}
|
14
|
+
{
|
15
|
+
type: @page_source.scan(/og:type" content="([^"]+)"/).flatten.first.encode('utf-8'),
|
16
|
+
description: @page_source.scan(/og:description" content="([^"]+)"/).flatten.first.encode('utf-8')
|
17
|
+
}.each do |k, v|
|
18
|
+
next if v.nil? || v == ''
|
19
|
+
@data[k] = CGI.unescapeHTML(v)
|
20
|
+
end
|
21
|
+
self
|
22
|
+
rescue => e
|
23
|
+
byebug
|
24
|
+
p e
|
25
|
+
return self
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
class IdsPlease
|
4
|
+
module Grabbers
|
5
|
+
class Instagram < IdsPlease::Grabbers::Base
|
6
|
+
|
7
|
+
def grab_link
|
8
|
+
@page_source ||= open(link).read
|
9
|
+
@network_id = @page_source.scan(/"user":{.+"id":"(\d+)"/).flatten.first
|
10
|
+
@avatar = @page_source.scan(/"user":{.+"profile_picture":"([^"]+)"/).flatten.first.gsub('\\', '')
|
11
|
+
@display_name = @page_source.scan(/"user":{.+"full_name":"([^"]+)"/).flatten.first
|
12
|
+
@username = @page_source.scan(/"user":{.+"username":"([^"]+)"/).flatten.first.gsub('\\', '')
|
13
|
+
counts = @page_source.scan(/"user":{.+"counts":({[^}]+})/).flatten.first
|
14
|
+
counts = JSON.parse counts
|
15
|
+
@data = {}
|
16
|
+
{
|
17
|
+
bio: @page_source.scan(/"user":{.+"bio":"([^"]+)"/).flatten.first,
|
18
|
+
website: @page_source.scan(/"user":{.+"website":"([^"]+)"/).flatten.first.gsub('\\', ''),
|
19
|
+
counts: counts
|
20
|
+
}.each do |k, v|
|
21
|
+
next if v.nil? || v == '' || !v.is_a?(String)
|
22
|
+
@data[k] = v.gsub(/\\u([\da-fA-F]{4})/) {|m| [$1].pack("H*").unpack("n*").pack("U*")}
|
23
|
+
end
|
24
|
+
@display_name = @display_name.gsub(/\\u([\da-fA-F]{4})/) {|m| [$1].pack("H*").unpack("n*").pack("U*")}
|
25
|
+
self
|
26
|
+
rescue => e
|
27
|
+
byebug
|
28
|
+
p e
|
29
|
+
return self
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Grabbers
|
3
|
+
class Vkontakte < IdsPlease::Grabbers::Base
|
4
|
+
|
5
|
+
def grab_link
|
6
|
+
agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'
|
7
|
+
@page_source ||= open(link, 'User-Agent' => agent).read.encode('utf-8')
|
8
|
+
@network_id = @page_source.scan(/href="\/wall(-\d+)_/).flatten.first
|
9
|
+
@username = @link.to_s.split('vk.com/').last.gsub('/', '')
|
10
|
+
@avatar = @page_source.scan(/page_avatar.+\n.+src="([^"]+)/).flatten.first
|
11
|
+
@avatar = CGI.unescapeHTML(@avatar) if @avatar
|
12
|
+
@display_name = @page_source.scan(/page_name">([^<]+)/).flatten.first
|
13
|
+
@display_name = CGI.unescapeHTML(@display_name) if @display_name
|
14
|
+
@data = {
|
15
|
+
description: @page_source.scan(/description" content="([^"]+)/).flatten.first
|
16
|
+
}
|
17
|
+
@data[:description] = CGI.unescapeHTML(@data[:description]) if @data[:description]
|
18
|
+
self
|
19
|
+
rescue => e
|
20
|
+
byebug
|
21
|
+
p e
|
22
|
+
return self
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative 'grabbers/base'
|
2
|
+
require_relative 'grabbers/facebook'
|
3
|
+
require_relative 'grabbers/vkontakte'
|
4
|
+
require_relative 'grabbers/instagram'
|
5
|
+
|
6
|
+
class IdsPlease
|
7
|
+
module Grabbers
|
8
|
+
|
9
|
+
NETWORKS = [
|
10
|
+
IdsPlease::Grabbers::Facebook,
|
11
|
+
IdsPlease::Grabbers::Vkontakte,
|
12
|
+
IdsPlease::Grabbers::Instagram
|
13
|
+
]
|
14
|
+
|
15
|
+
def self.each
|
16
|
+
NETWORKS.each { |n| yield n }
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.by_symbol(sym)
|
20
|
+
klass_name = "#{sym.to_s[0].upcase}#{sym.to_s[1..-1]}"
|
21
|
+
self.const_get(klass_name)
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Base
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def to_sym
|
7
|
+
self.name.split('::').last.downcase.to_sym
|
8
|
+
end
|
9
|
+
|
10
|
+
def interact(links)
|
11
|
+
links.map do |l|
|
12
|
+
id = parse_link(l)
|
13
|
+
matched_id = id.match(valid_id_regex) if id
|
14
|
+
matched_id[1] if matched_id
|
15
|
+
end.compact
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def parse_link(link)
|
21
|
+
link.path.split('/')[1]
|
22
|
+
end
|
23
|
+
|
24
|
+
def valid_id_regex
|
25
|
+
/\A([\w\.\+-]{2,})/
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Blogger < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /blogspot|blogger/i
|
6
|
+
|
7
|
+
def self.interact(links)
|
8
|
+
links.map do |link|
|
9
|
+
parse_link(link)
|
10
|
+
end.compact
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parse_link(link)
|
14
|
+
query = CGI.parse(link.query) if link.query && !link.query.empty?
|
15
|
+
|
16
|
+
if query && !query['blogID'].empty?
|
17
|
+
query['blogID'].first.split('#').first
|
18
|
+
else
|
19
|
+
return if link.host.sub('.blogspot.com', '') == link.host
|
20
|
+
link.host.sub('.blogspot.com', '')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Facebook < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /fb\.me|fb\.com|facebook/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_link(link)
|
11
|
+
query = CGI.parse(link.query) if link.query && !link.query.empty?
|
12
|
+
|
13
|
+
if query && !query['id'].empty?
|
14
|
+
query['id'].first
|
15
|
+
elsif link.path =~ /\/pages\//
|
16
|
+
link.path.split('/').last
|
17
|
+
else
|
18
|
+
link.path.split('/')[1]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class GooglePlus < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /google/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def to_sym
|
9
|
+
:google_plus
|
10
|
+
end
|
11
|
+
|
12
|
+
def interact(links)
|
13
|
+
links.map { |l| parse_link(l) }.compact
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def parse_link(link)
|
19
|
+
if matched = link.path.match(/\/(\+\w+)/)
|
20
|
+
matched[1]
|
21
|
+
elsif matched = link.path.match(/\/(\d{2,})/)
|
22
|
+
matched[1]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Hi5 < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /hi5/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_link(link)
|
11
|
+
query = CGI.parse(link.query) if link.query && !link.query.empty?
|
12
|
+
|
13
|
+
if query && !query['uid'].empty?
|
14
|
+
query['uid'].first
|
15
|
+
else
|
16
|
+
link.path.split('/')[1]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Linkedin < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /linkedin/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_link(link)
|
11
|
+
query = CGI.parse(link.query) if link.query && !link.query.empty?
|
12
|
+
|
13
|
+
if query && !query['id'].empty?
|
14
|
+
query['id'].first
|
15
|
+
elsif link.path =~ /\/in\//
|
16
|
+
link.path.split('/')[2]
|
17
|
+
elsif link.path =~ /\/company\//
|
18
|
+
link.path.split('/')[2]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Livejournal < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /livejournal/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_link(link)
|
11
|
+
parsed = link.host.sub('.livejournal.com', '')
|
12
|
+
parsed = link.host.split('.livejournal').first if parsed == link.host
|
13
|
+
return if parsed == link.host
|
14
|
+
parsed
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Moikrug < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /moikrug/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def interact(links)
|
9
|
+
links.map do |link|
|
10
|
+
next if link.host.sub('.moikrug.ru', '') == link.host
|
11
|
+
parse_link(link)
|
12
|
+
end.compact
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def parse_link(link)
|
18
|
+
link.host.sub('.moikrug.ru', '')
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Odnoklassniki < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /odnoklassniki|ok\.ru/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_link(link)
|
11
|
+
if matched = link.path.match(/\/(\d{2,})/)
|
12
|
+
matched[1]
|
13
|
+
elsif link.path =~ /\/about\//
|
14
|
+
link.path.split('/')[-2]
|
15
|
+
elsif link.path.split('/').size >= 3
|
16
|
+
link.path.split('/')[2]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Tumblr < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /tumblr/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_link(link)
|
11
|
+
return if link.host.sub('.tumblr.com', '') == link.host
|
12
|
+
link.host.sub('.tumblr.com', '')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Twitter < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /twitter/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_link(link)
|
11
|
+
if link.path =~ /%23!/
|
12
|
+
id = link.path.sub(/\A\/%23!\//, '')
|
13
|
+
id.split(/[\/\?#]/).first
|
14
|
+
else
|
15
|
+
super
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Vkontakte < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /vk\.com|vkontakte/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def interact(links)
|
9
|
+
links.map { |l| parse_link(l) }.compact
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def parse_link(link)
|
15
|
+
if link.path =~ /id|club|public/
|
16
|
+
id = link.path.sub(/\A\/id|\A\/club|\A\/public/, '')
|
17
|
+
id.split(/[\/\?#]/).first
|
18
|
+
else
|
19
|
+
super
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module Parsers
|
3
|
+
class Youtube < IdsPlease::Parsers::Base
|
4
|
+
|
5
|
+
MASK = /youtu\.be|youtube/i
|
6
|
+
|
7
|
+
class << self
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_link(link)
|
11
|
+
if link.path =~ /channels|user/
|
12
|
+
link.path.split('/')[2]
|
13
|
+
else
|
14
|
+
link.path.split('/')[1]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require_relative 'parsers/base'
|
2
|
+
require_relative 'parsers/facebook'
|
3
|
+
require_relative 'parsers/google_plus'
|
4
|
+
require_relative 'parsers/instagram'
|
5
|
+
require_relative 'parsers/blogger'
|
6
|
+
require_relative 'parsers/ameba'
|
7
|
+
require_relative 'parsers/hi5'
|
8
|
+
require_relative 'parsers/livejournal'
|
9
|
+
require_relative 'parsers/linkedin'
|
10
|
+
require_relative 'parsers/pinterest'
|
11
|
+
require_relative 'parsers/reddit'
|
12
|
+
require_relative 'parsers/twitter'
|
13
|
+
require_relative 'parsers/tumblr'
|
14
|
+
require_relative 'parsers/vimeo'
|
15
|
+
require_relative 'parsers/youtube'
|
16
|
+
require_relative 'parsers/soundcloud'
|
17
|
+
require_relative 'parsers/vkontakte'
|
18
|
+
require_relative 'parsers/odnoklassniki'
|
19
|
+
require_relative 'parsers/moikrug'
|
20
|
+
|
21
|
+
class IdsPlease
|
22
|
+
module Parsers
|
23
|
+
|
24
|
+
NETWORKS = [
|
25
|
+
IdsPlease::Parsers::GooglePlus,
|
26
|
+
IdsPlease::Parsers::Vkontakte,
|
27
|
+
IdsPlease::Parsers::Twitter,
|
28
|
+
IdsPlease::Parsers::Facebook,
|
29
|
+
IdsPlease::Parsers::Instagram,
|
30
|
+
IdsPlease::Parsers::Blogger,
|
31
|
+
IdsPlease::Parsers::Ameba,
|
32
|
+
IdsPlease::Parsers::Hi5,
|
33
|
+
IdsPlease::Parsers::Linkedin,
|
34
|
+
IdsPlease::Parsers::Livejournal,
|
35
|
+
IdsPlease::Parsers::Reddit,
|
36
|
+
IdsPlease::Parsers::Pinterest,
|
37
|
+
IdsPlease::Parsers::Soundcloud,
|
38
|
+
IdsPlease::Parsers::Vimeo,
|
39
|
+
IdsPlease::Parsers::Youtube,
|
40
|
+
IdsPlease::Parsers::Odnoklassniki,
|
41
|
+
IdsPlease::Parsers::Tumblr,
|
42
|
+
IdsPlease::Parsers::Moikrug
|
43
|
+
]
|
44
|
+
|
45
|
+
def self.each
|
46
|
+
NETWORKS.each { |n| yield n }
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.by_symbol(sym)
|
50
|
+
if sym == :google_plus
|
51
|
+
IdsPlease::Parsers::GooglePlus
|
52
|
+
else
|
53
|
+
klass_name = "#{sym.to_s[0].upcase}#{sym.to_s[1..-1]}"
|
54
|
+
self.const_get(klass_name)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
data/lib/ids_please.rb
CHANGED
@@ -1,54 +1,22 @@
|
|
1
1
|
require 'uri'
|
2
2
|
require 'cgi'
|
3
|
-
require_relative 'ids_please/
|
4
|
-
require_relative 'ids_please/
|
5
|
-
require_relative 'ids_please/google_plus'
|
6
|
-
require_relative 'ids_please/instagram'
|
7
|
-
require_relative 'ids_please/blogger'
|
8
|
-
require_relative 'ids_please/ameba'
|
9
|
-
require_relative 'ids_please/hi5'
|
10
|
-
require_relative 'ids_please/livejournal'
|
11
|
-
require_relative 'ids_please/linkedin'
|
12
|
-
require_relative 'ids_please/pinterest'
|
13
|
-
require_relative 'ids_please/reddit'
|
14
|
-
require_relative 'ids_please/twitter'
|
15
|
-
require_relative 'ids_please/tumblr'
|
16
|
-
require_relative 'ids_please/vimeo'
|
17
|
-
require_relative 'ids_please/youtube'
|
18
|
-
require_relative 'ids_please/soundcloud'
|
19
|
-
require_relative 'ids_please/vkontakte'
|
20
|
-
require_relative 'ids_please/odnoklassniki'
|
21
|
-
require_relative 'ids_please/moikrug'
|
3
|
+
require_relative 'ids_please/parsers'
|
4
|
+
require_relative 'ids_please/grabbers'
|
22
5
|
|
23
6
|
class IdsPlease
|
24
7
|
|
25
|
-
|
8
|
+
def self.parsers
|
9
|
+
IdsPlease::Parsers
|
10
|
+
end
|
26
11
|
|
27
|
-
|
12
|
+
def self.grabbers
|
13
|
+
IdsPlease::Grabbers
|
14
|
+
end
|
28
15
|
|
29
|
-
|
30
|
-
IdsPlease::GooglePlus,
|
31
|
-
IdsPlease::Vkontakte,
|
32
|
-
IdsPlease::Twitter,
|
33
|
-
IdsPlease::Facebook,
|
34
|
-
IdsPlease::Instagram,
|
35
|
-
IdsPlease::Blogger,
|
36
|
-
IdsPlease::Ameba,
|
37
|
-
IdsPlease::Hi5,
|
38
|
-
IdsPlease::Linkedin,
|
39
|
-
IdsPlease::Livejournal,
|
40
|
-
IdsPlease::Reddit,
|
41
|
-
IdsPlease::Pinterest,
|
42
|
-
IdsPlease::Soundcloud,
|
43
|
-
IdsPlease::Vimeo,
|
44
|
-
IdsPlease::Youtube,
|
45
|
-
IdsPlease::Odnoklassniki,
|
46
|
-
IdsPlease::Tumblr,
|
47
|
-
IdsPlease::Moikrug
|
48
|
-
]
|
16
|
+
attr_accessor :original, :unrecognized, :parsed, :grabbed
|
49
17
|
|
50
18
|
def initialize(*args)
|
51
|
-
@original = args.dup
|
19
|
+
@original = args.flatten.dup
|
52
20
|
end
|
53
21
|
|
54
22
|
def recognize
|
@@ -62,22 +30,44 @@ class IdsPlease
|
|
62
30
|
end
|
63
31
|
|
64
32
|
def parse
|
33
|
+
interact(:parsers)
|
34
|
+
end
|
35
|
+
|
36
|
+
def grab
|
37
|
+
interact(:grabbers)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def interact(interactors = :parsers)
|
65
43
|
recognize
|
66
|
-
|
67
|
-
@recognized.each do |
|
68
|
-
|
44
|
+
interacted = Hash.new { |hash, network| hash[network] = [] }
|
45
|
+
@recognized.each do |network, links|
|
46
|
+
interactor = IdsPlease.send(interactors).by_symbol(network)
|
47
|
+
interacted[network].concat interactor.interact(links)
|
69
48
|
end
|
49
|
+
self.instance_variable_set(interacted_var(interactors), interacted)
|
50
|
+
|
51
|
+
interacted
|
70
52
|
end
|
71
53
|
|
72
|
-
|
54
|
+
def interacted_var(interactors)
|
55
|
+
if interactors == :parsers
|
56
|
+
:@parsed
|
57
|
+
elsif interactors == :grabbers
|
58
|
+
:@grabbed
|
59
|
+
else
|
60
|
+
throw 'Wrong interactors type'
|
61
|
+
end
|
62
|
+
end
|
73
63
|
|
74
64
|
def recognize_link(link)
|
75
65
|
link = "http://#{link}" unless link =~ /\Ahttps?:\/\//
|
76
66
|
parsed_link = URI(URI.encode(link))
|
77
|
-
|
67
|
+
IdsPlease::Parsers.each do |network|
|
78
68
|
if parsed_link.host =~ network::MASK
|
79
|
-
@recognized[network] ||= []
|
80
|
-
@recognized[network] << parsed_link
|
69
|
+
@recognized[network.to_sym] ||= []
|
70
|
+
@recognized[network.to_sym] << parsed_link
|
81
71
|
return
|
82
72
|
end
|
83
73
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ids_please
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0.beta
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- gazay
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -53,25 +53,32 @@ files:
|
|
53
53
|
- Rakefile
|
54
54
|
- ids_please.gemspec
|
55
55
|
- lib/ids_please.rb
|
56
|
-
- lib/ids_please/
|
57
|
-
- lib/ids_please/
|
58
|
-
- lib/ids_please/
|
59
|
-
- lib/ids_please/
|
60
|
-
- lib/ids_please/
|
61
|
-
- lib/ids_please/
|
62
|
-
- lib/ids_please/
|
63
|
-
- lib/ids_please/
|
64
|
-
- lib/ids_please/
|
65
|
-
- lib/ids_please/
|
66
|
-
- lib/ids_please/
|
67
|
-
- lib/ids_please/
|
68
|
-
- lib/ids_please/
|
69
|
-
- lib/ids_please/
|
70
|
-
- lib/ids_please/
|
71
|
-
- lib/ids_please/
|
72
|
-
- lib/ids_please/
|
73
|
-
- lib/ids_please/
|
74
|
-
- lib/ids_please/
|
56
|
+
- lib/ids_please/grabbers.rb
|
57
|
+
- lib/ids_please/grabbers/base.rb
|
58
|
+
- lib/ids_please/grabbers/facebook.rb
|
59
|
+
- lib/ids_please/grabbers/instagram.rb
|
60
|
+
- lib/ids_please/grabbers/vkontakte.rb
|
61
|
+
- lib/ids_please/parsers.rb
|
62
|
+
- lib/ids_please/parsers/ameba.rb
|
63
|
+
- lib/ids_please/parsers/base.rb
|
64
|
+
- lib/ids_please/parsers/blogger.rb
|
65
|
+
- lib/ids_please/parsers/facebook.rb
|
66
|
+
- lib/ids_please/parsers/google_plus.rb
|
67
|
+
- lib/ids_please/parsers/hi5.rb
|
68
|
+
- lib/ids_please/parsers/instagram.rb
|
69
|
+
- lib/ids_please/parsers/linkedin.rb
|
70
|
+
- lib/ids_please/parsers/livejournal.rb
|
71
|
+
- lib/ids_please/parsers/moikrug.rb
|
72
|
+
- lib/ids_please/parsers/odnoklassniki.rb
|
73
|
+
- lib/ids_please/parsers/pinterest.rb
|
74
|
+
- lib/ids_please/parsers/reddit.rb
|
75
|
+
- lib/ids_please/parsers/soundcloud.rb
|
76
|
+
- lib/ids_please/parsers/tumblr.rb
|
77
|
+
- lib/ids_please/parsers/twitter.rb
|
78
|
+
- lib/ids_please/parsers/vimeo.rb
|
79
|
+
- lib/ids_please/parsers/vkontakte.rb
|
80
|
+
- lib/ids_please/parsers/youtube.rb
|
81
|
+
- lib/ids_please/version.rb
|
75
82
|
- spec/ids_please/basic_spec.rb
|
76
83
|
- spec/spec_helper.rb
|
77
84
|
homepage: http://github.com/gazay/ids_please
|
@@ -90,9 +97,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
90
97
|
version: '0'
|
91
98
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
99
|
requirements:
|
93
|
-
- - "
|
100
|
+
- - ">"
|
94
101
|
- !ruby/object:Gem::Version
|
95
|
-
version:
|
102
|
+
version: 1.3.1
|
96
103
|
requirements: []
|
97
104
|
rubyforge_project:
|
98
105
|
rubygems_version: 2.4.5
|
data/lib/ids_please/ameba.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class BaseParser
|
3
|
-
|
4
|
-
class << self
|
5
|
-
def to_sym
|
6
|
-
self.name.split('::').last.downcase.to_sym
|
7
|
-
end
|
8
|
-
|
9
|
-
def parse(links)
|
10
|
-
links.map do |l|
|
11
|
-
id = parse_link(l)
|
12
|
-
matched_id = id.match(valid_id_regex) if id
|
13
|
-
matched_id[1] if matched_id
|
14
|
-
end.compact
|
15
|
-
end
|
16
|
-
|
17
|
-
private
|
18
|
-
|
19
|
-
def parse_link(link)
|
20
|
-
link.path.split('/')[1]
|
21
|
-
end
|
22
|
-
|
23
|
-
def valid_id_regex
|
24
|
-
/\A([\w\.\+-]{2,})/
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
29
|
-
end
|
data/lib/ids_please/blogger.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Blogger < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /blogspot|blogger/i
|
5
|
-
|
6
|
-
def self.parse(links)
|
7
|
-
links.map do |link|
|
8
|
-
parse_link(link)
|
9
|
-
end.compact
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.parse_link(link)
|
13
|
-
query = CGI.parse(link.query) if link.query && !link.query.empty?
|
14
|
-
|
15
|
-
if query && !query['blogID'].empty?
|
16
|
-
query['blogID'].first.split('#').first
|
17
|
-
else
|
18
|
-
return if link.host.sub('.blogspot.com', '') == link.host
|
19
|
-
link.host.sub('.blogspot.com', '')
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
data/lib/ids_please/facebook.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Facebook < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /fb\.me|fb\.com|facebook/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
private
|
8
|
-
|
9
|
-
def parse_link(link)
|
10
|
-
query = CGI.parse(link.query) if link.query && !link.query.empty?
|
11
|
-
|
12
|
-
if query && !query['id'].empty?
|
13
|
-
query['id'].first
|
14
|
-
elsif link.path =~ /\/pages\//
|
15
|
-
link.path.split('/').last
|
16
|
-
else
|
17
|
-
link.path.split('/')[1]
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
23
|
-
end
|
@@ -1,27 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class GooglePlus < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /google/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
def to_sym
|
8
|
-
:google_plus
|
9
|
-
end
|
10
|
-
|
11
|
-
def parse(links)
|
12
|
-
links.map { |l| parse_link(l) }.compact
|
13
|
-
end
|
14
|
-
|
15
|
-
private
|
16
|
-
|
17
|
-
def parse_link(link)
|
18
|
-
if matched = link.path.match(/\/(\+\w+)/)
|
19
|
-
matched[1]
|
20
|
-
elsif matched = link.path.match(/\/(\d{2,})/)
|
21
|
-
matched[1]
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
end
|
data/lib/ids_please/hi5.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Hi5 < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /hi5/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
private
|
8
|
-
|
9
|
-
def parse_link(link)
|
10
|
-
query = CGI.parse(link.query) if link.query && !link.query.empty?
|
11
|
-
|
12
|
-
if query && !query['uid'].empty?
|
13
|
-
query['uid'].first
|
14
|
-
else
|
15
|
-
link.path.split('/')[1]
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
end
|
21
|
-
end
|
data/lib/ids_please/instagram.rb
DELETED
data/lib/ids_please/linkedin.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Linkedin < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /linkedin/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
private
|
8
|
-
|
9
|
-
def parse_link(link)
|
10
|
-
query = CGI.parse(link.query) if link.query && !link.query.empty?
|
11
|
-
|
12
|
-
if query && !query['id'].empty?
|
13
|
-
query['id'].first
|
14
|
-
elsif link.path =~ /\/in\//
|
15
|
-
link.path.split('/')[2]
|
16
|
-
elsif link.path =~ /\/company\//
|
17
|
-
link.path.split('/')[2]
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
23
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Livejournal < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /livejournal/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
private
|
8
|
-
|
9
|
-
def parse_link(link)
|
10
|
-
parsed = link.host.sub('.livejournal.com', '')
|
11
|
-
parsed = link.host.split('.livejournal').first if parsed == link.host
|
12
|
-
return if parsed == link.host
|
13
|
-
parsed
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
end
|
data/lib/ids_please/moikrug.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Moikrug < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /moikrug/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
def parse(links)
|
8
|
-
links.map do |link|
|
9
|
-
next if link.host.sub('.moikrug.ru', '') == link.host
|
10
|
-
parse_link(link)
|
11
|
-
end.compact
|
12
|
-
end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def parse_link(link)
|
17
|
-
link.host.sub('.moikrug.ru', '')
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Odnoklassniki < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /odnoklassniki|ok\.ru/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
private
|
8
|
-
|
9
|
-
def parse_link(link)
|
10
|
-
if matched = link.path.match(/\/(\d{2,})/)
|
11
|
-
matched[1]
|
12
|
-
elsif link.path =~ /\/about\//
|
13
|
-
link.path.split('/')[-2]
|
14
|
-
elsif link.path.split('/').size >= 3
|
15
|
-
link.path.split('/')[2]
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
end
|
21
|
-
end
|
data/lib/ids_please/pinterest.rb
DELETED
data/lib/ids_please/reddit.rb
DELETED
data/lib/ids_please/tumblr.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Tumblr < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /tumblr/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
private
|
8
|
-
|
9
|
-
def parse_link(link)
|
10
|
-
return if link.host.sub('.tumblr.com', '') == link.host
|
11
|
-
link.host.sub('.tumblr.com', '')
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
end
|
16
|
-
end
|
data/lib/ids_please/twitter.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Twitter < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /twitter/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
private
|
8
|
-
|
9
|
-
def parse_link(link)
|
10
|
-
if link.path =~ /%23!/
|
11
|
-
id = link.path.sub(/\A\/%23!\//, '')
|
12
|
-
id.split(/[\/\?#]/).first
|
13
|
-
else
|
14
|
-
super
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
20
|
-
end
|
data/lib/ids_please/vimeo.rb
DELETED
data/lib/ids_please/vkontakte.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Vkontakte < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /vk\.com|vkontakte/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
def parse(links)
|
8
|
-
links.map { |l| parse_link(l) }.compact
|
9
|
-
end
|
10
|
-
|
11
|
-
private
|
12
|
-
|
13
|
-
def parse_link(link)
|
14
|
-
if link.path =~ /id|club|public/
|
15
|
-
id = link.path.sub(/\A\/id|\A\/club|\A\/public/, '')
|
16
|
-
id.split(/[\/\?#]/).first
|
17
|
-
else
|
18
|
-
super
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|
24
|
-
end
|
data/lib/ids_please/youtube.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
class IdsPlease
|
2
|
-
class Youtube < IdsPlease::BaseParser
|
3
|
-
|
4
|
-
MASK = /youtu\.be|youtube/i
|
5
|
-
|
6
|
-
class << self
|
7
|
-
private
|
8
|
-
|
9
|
-
def parse_link(link)
|
10
|
-
if link.path =~ /channels|user/
|
11
|
-
link.path.split('/')[2]
|
12
|
-
else
|
13
|
-
link.path.split('/')[1]
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
end
|
19
|
-
end
|