ids_please 2.2.0 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +3 -3
- data/bin/ids_please +7 -0
- data/ids_please.gemspec +11 -9
- data/lib/ids_please/cli.rb +112 -0
- data/lib/ids_please/grabbers/base.rb +22 -2
- data/lib/ids_please/grabbers/facebook.rb +77 -24
- data/lib/ids_please/grabbers/google_plus.rb +67 -11
- data/lib/ids_please/grabbers/instagram.rb +85 -14
- data/lib/ids_please/grabbers/mailru.rb +4 -4
- data/lib/ids_please/grabbers/twitter.rb +106 -20
- data/lib/ids_please/grabbers/vkontakte.rb +5 -7
- data/lib/ids_please/grabbers.rb +3 -3
- data/lib/ids_please/parsers/base.rb +3 -5
- data/lib/ids_please/parsers/facebook.rb +0 -2
- data/lib/ids_please/parsers/google_plus.rb +0 -2
- data/lib/ids_please/parsers/hi5.rb +0 -2
- data/lib/ids_please/parsers/linkedin.rb +0 -2
- data/lib/ids_please/parsers/livejournal.rb +0 -2
- data/lib/ids_please/parsers/mailru.rb +0 -2
- data/lib/ids_please/parsers/moikrug.rb +0 -2
- data/lib/ids_please/parsers/odnoklassniki.rb +0 -2
- data/lib/ids_please/parsers/reddit.rb +0 -2
- data/lib/ids_please/parsers/tumblr.rb +0 -2
- data/lib/ids_please/parsers/twitter.rb +0 -2
- data/lib/ids_please/parsers/vkontakte.rb +0 -2
- data/lib/ids_please/parsers/youtube.rb +0 -2
- data/lib/ids_please/parsers.rb +4 -4
- data/lib/ids_please/version.rb +3 -1
- data/lib/ids_please.rb +15 -15
- data/spec/ids_please/basic_spec.rb +39 -46
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 728f2add26771adaaedbb2afb42fa59126e19a10
|
4
|
+
data.tar.gz: 56285e208b28c26303362a8070b3e538eecac37a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a26e536a30e7461fd9396df7d1db9806affb1c9212fdc4850eb853588a706b705c2b16c5f012cff8c7d04be30e88408ffbcea8f28967109d1ef7f81c1995eaec
|
7
|
+
data.tar.gz: d051a8a184798fc5d080f65db17bc78ce265e6030748923c3a6e6f0599154342ec1ec1de626ddb077c934c086015ea59d8d0b7d68c626c53113f26306e70a75d
|
data/Rakefile
CHANGED
@@ -4,9 +4,9 @@ require 'bundler'
|
|
4
4
|
Bundler::GemHelper.install_tasks
|
5
5
|
|
6
6
|
desc 'Run all tests by default'
|
7
|
-
task :
|
7
|
+
task default: :spec
|
8
8
|
|
9
9
|
require 'rspec/core/rake_task'
|
10
10
|
RSpec::Core::RakeTask.new do |t|
|
11
|
-
t.rspec_opts = [
|
12
|
-
end
|
11
|
+
t.rspec_opts = ['--color', '--format doc']
|
12
|
+
end
|
data/bin/ids_please
ADDED
data/ids_please.gemspec
CHANGED
@@ -4,17 +4,19 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'ids_please/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
|
-
s.name =
|
7
|
+
s.name = 'ids_please'
|
8
8
|
s.version = IdsPlease::VERSION
|
9
|
-
s.authors = [
|
10
|
-
s.description =
|
11
|
-
s.summary =
|
9
|
+
s.authors = ['gazay']
|
10
|
+
s.description = 'Helps to get ids or screen names from links to social network accounts'
|
11
|
+
s.summary = 'Helps to get ids or screen names from links to social network accounts'
|
12
12
|
s.licenses = ['MIT']
|
13
|
-
s.email =
|
14
|
-
s.extra_rdoc_files = [
|
15
|
-
s.rdoc_options = [
|
16
|
-
s.homepage =
|
17
|
-
s.require_paths = [
|
13
|
+
s.email = 'alex.gaziev@gmail.com'
|
14
|
+
s.extra_rdoc_files = ['LICENSE']
|
15
|
+
s.rdoc_options = ['--charset=UTF-8']
|
16
|
+
s.homepage = 'http://github.com/gazay/ids_please'
|
17
|
+
s.require_paths = ['lib']
|
18
|
+
s.bindir = 'bin'
|
19
|
+
s.executables = 'ids_please'
|
18
20
|
s.files = `git ls-files`.split("\n")
|
19
21
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
22
|
s.add_development_dependency 'rake'
|
@@ -0,0 +1,112 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module CLI
|
3
|
+
def self.run(args)
|
4
|
+
command = args.shift
|
5
|
+
case command
|
6
|
+
when 'grab', 'parse', 'recognize'
|
7
|
+
when 'help', nil
|
8
|
+
help
|
9
|
+
exit
|
10
|
+
else
|
11
|
+
abort "Unknown command. Enter 'ids_please help' for instructions"
|
12
|
+
end
|
13
|
+
|
14
|
+
links = args
|
15
|
+
if links.empty?
|
16
|
+
abort "You didn't enter any links. Enter 'ids_please help' for instructions"
|
17
|
+
end
|
18
|
+
|
19
|
+
ids = IdsPlease.new(*links)
|
20
|
+
case command
|
21
|
+
when 'grab'
|
22
|
+
grab(ids)
|
23
|
+
when 'parse'
|
24
|
+
parse(ids)
|
25
|
+
when 'recognize'
|
26
|
+
recognize(ids)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
module_function
|
31
|
+
|
32
|
+
def grab(ids)
|
33
|
+
ids.grab
|
34
|
+
ids.grabbed.each do |social_network, grabbers_array|
|
35
|
+
puts social_network.to_s.capitalize + ': '
|
36
|
+
grabbers_array.each do |grabber|
|
37
|
+
grabber.to_h.each do |property, value|
|
38
|
+
unless value.nil? || value.to_s.empty? || property == :page_source
|
39
|
+
|
40
|
+
if value.class == Hash
|
41
|
+
value.delete_if { |_, v| v.nil? }
|
42
|
+
unless value.empty?
|
43
|
+
puts " #{property}: "
|
44
|
+
value.each do |k, v|
|
45
|
+
puts " #{k}: #{v}"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
else
|
49
|
+
puts " #{property}: #{value}"
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
puts "\n" unless grabbers_array.last == grabber
|
55
|
+
end
|
56
|
+
puts "\n" unless ids.grabbed.to_a.last[0] == social_network
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def parse(ids)
|
61
|
+
ids.parse
|
62
|
+
ids.parsed.each do |social_network, permalinks_array|
|
63
|
+
puts social_network.to_s.capitalize + ': '
|
64
|
+
permalinks_array.each do |permalink|
|
65
|
+
puts " #{permalink}"
|
66
|
+
end
|
67
|
+
puts "\n" unless ids.parsed.to_a.last[0] == social_network
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def recognize(ids)
|
72
|
+
ids.recognize
|
73
|
+
unless ids.recognized.empty?
|
74
|
+
puts 'Recognized:'
|
75
|
+
ids.recognized.each do |social_network, urls_array|
|
76
|
+
puts " #{social_network.to_s.capitalize}: "
|
77
|
+
urls_array.each do |url|
|
78
|
+
puts " #{url}"
|
79
|
+
end
|
80
|
+
puts "\n"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
unless ids.unrecognized.empty?
|
85
|
+
puts 'Unrecognized:'
|
86
|
+
ids.unrecognized.each do |url|
|
87
|
+
puts " #{url}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def help
|
93
|
+
puts <<-HELP.gsub(/^ {8}/, '')
|
94
|
+
IDs, please
|
95
|
+
Grab some hidden in html data from social account page
|
96
|
+
Get social network IDs or screen names from links to social network accounts
|
97
|
+
|
98
|
+
Usage:
|
99
|
+
ids_please command [links]
|
100
|
+
|
101
|
+
Available commands:
|
102
|
+
grab grab some hidden in html data from social account page (avatar, username, id...)
|
103
|
+
parse get screen names from links to social network accounts
|
104
|
+
recognize check that the link is for a known social network
|
105
|
+
|
106
|
+
Examples:
|
107
|
+
ids_please grab https://instagram.com/microsoft
|
108
|
+
ids_please parse https://facebook.com/Microsoft https://instagram.com/microsoft
|
109
|
+
HELP
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -8,13 +8,20 @@ class IdsPlease
|
|
8
8
|
links.map { |l| self.new(l).grab_link }
|
9
9
|
end
|
10
10
|
|
11
|
-
attr_reader :avatar,
|
11
|
+
attr_reader :avatar,
|
12
|
+
:display_name,
|
13
|
+
:username,
|
14
|
+
:link,
|
15
|
+
:page_source,
|
16
|
+
:network_id,
|
17
|
+
:data,
|
18
|
+
:counts
|
12
19
|
|
13
20
|
def initialize(link)
|
14
21
|
@link = link
|
15
22
|
end
|
16
23
|
|
17
|
-
def grab_link(
|
24
|
+
def grab_link(_link)
|
18
25
|
throw 'Base grabber can not grab anything'
|
19
26
|
end
|
20
27
|
|
@@ -48,6 +55,19 @@ class IdsPlease
|
|
48
55
|
def page_source
|
49
56
|
@page_source ||= open(link).read
|
50
57
|
end
|
58
|
+
|
59
|
+
def errors
|
60
|
+
@errors ||= []
|
61
|
+
end
|
62
|
+
|
63
|
+
def record_error(event, message)
|
64
|
+
errors << "#{event} has #{message}"
|
65
|
+
end
|
66
|
+
|
67
|
+
def find_by_regex(reg)
|
68
|
+
page_source.scan(reg).flatten.first
|
69
|
+
end
|
70
|
+
|
51
71
|
end
|
52
72
|
end
|
53
73
|
end
|
@@ -3,43 +3,96 @@ class IdsPlease
|
|
3
3
|
class Facebook < IdsPlease::Grabbers::Base
|
4
4
|
|
5
5
|
def grab_link
|
6
|
-
@network_id =
|
7
|
-
@avatar =
|
8
|
-
@display_name =
|
9
|
-
@username =
|
10
|
-
|
11
|
-
@display_name = CGI.unescapeHTML(@display_name.encode('utf-8')) if @display_name
|
12
|
-
@data = {}
|
13
|
-
{
|
14
|
-
type: page_source.scan(/og:type" content="([^"]+)"/).flatten.first.to_s.encode('utf-8'),
|
15
|
-
description: page_source.scan(/og:description" content="([^"]+)"/).flatten.first.to_s.encode('utf-8'),
|
16
|
-
}.each do |k, v|
|
17
|
-
next if v.nil? || v == ''
|
18
|
-
@data[k] = CGI.unescapeHTML(v).strip
|
19
|
-
end
|
6
|
+
@network_id = find_network_id
|
7
|
+
@avatar = find_avatar
|
8
|
+
@display_name = find_display_name
|
9
|
+
@username = find_username
|
10
|
+
|
20
11
|
@counts = {
|
21
|
-
likes:
|
22
|
-
visits:
|
23
|
-
}.delete_if {|
|
12
|
+
likes: find_likes,
|
13
|
+
visits: find_visits
|
14
|
+
}.delete_if { |_k, v| v.nil? }
|
15
|
+
|
16
|
+
@data = {
|
17
|
+
type: find_type,
|
18
|
+
description: find_description
|
19
|
+
}.delete_if { |_k, v| v.nil? }
|
20
|
+
|
24
21
|
self
|
25
22
|
rescue => e
|
26
|
-
|
23
|
+
record_error __method__, e.message
|
27
24
|
return self
|
28
25
|
end
|
29
26
|
|
30
|
-
|
31
|
-
|
27
|
+
private
|
28
|
+
|
29
|
+
def find_network_id
|
30
|
+
find_by_regex(/entity_id":"(\d+)"/)
|
31
|
+
rescue => e
|
32
|
+
record_error __method__, e.message
|
33
|
+
return nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def find_avatar
|
37
|
+
CGI.unescapeHTML(
|
38
|
+
find_by_regex(/profilePic\simg"\salt=[^=]+="([^"]+)/).encode('utf-8')
|
39
|
+
)
|
40
|
+
rescue => e
|
41
|
+
record_error __method__, e.message
|
42
|
+
return nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def find_display_name
|
46
|
+
CGI.unescapeHTML(
|
47
|
+
find_by_regex(/pageTitle">([^<\|]+)/).strip.encode('utf-8')
|
48
|
+
)
|
49
|
+
rescue => e
|
50
|
+
record_error __method__, e.message
|
51
|
+
return nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def find_username
|
55
|
+
find_by_regex(/link\srel="canonical"\shref="https:\/\/facebook\.com\/([^"]+)/) ||
|
56
|
+
find_by_regex(/;\sURL=\/([^\/\?]+)/)
|
57
|
+
rescue => e
|
58
|
+
record_error __method__, e.message
|
59
|
+
return nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def find_type
|
63
|
+
find_by_regex(/type":"Person/) ? 'perosnal' : 'group'
|
64
|
+
rescue => e
|
65
|
+
record_error __method__, e.message
|
66
|
+
return nil
|
67
|
+
end
|
68
|
+
|
69
|
+
def find_description
|
70
|
+
CGI.unescapeHTML(
|
71
|
+
find_by_regex(/name="description" content="([^"]+)"/).encode('utf-8')
|
72
|
+
).strip
|
73
|
+
rescue => e
|
74
|
+
record_error __method__, e.message
|
75
|
+
return nil
|
76
|
+
end
|
77
|
+
|
78
|
+
def find_likes
|
79
|
+
if likes = find_by_regex(/>([^"]+) <span class=".+">likes/)
|
80
|
+
likes.tr(',', '').to_i
|
81
|
+
end
|
32
82
|
rescue => e
|
33
|
-
|
83
|
+
record_error __method__, e.message
|
34
84
|
return nil
|
35
85
|
end
|
36
86
|
|
37
|
-
def
|
38
|
-
|
87
|
+
def find_visits
|
88
|
+
if visits = find_by_regex(/likes.+>([^"]+)<\/span> <span class=".+">visits/)
|
89
|
+
visits.tr(',', '').to_i
|
90
|
+
end
|
39
91
|
rescue => e
|
40
|
-
|
92
|
+
record_error __method__, e.message
|
41
93
|
return nil
|
42
94
|
end
|
95
|
+
|
43
96
|
end
|
44
97
|
end
|
45
98
|
end
|
@@ -3,23 +3,79 @@ class IdsPlease
|
|
3
3
|
class GooglePlus < IdsPlease::Grabbers::Base
|
4
4
|
|
5
5
|
def grab_link
|
6
|
-
@network_id =
|
7
|
-
@avatar =
|
8
|
-
@display_name =
|
9
|
-
@username =
|
10
|
-
|
11
|
-
description: page_source.scan(/name="Description" content="([^"]+)">/).flatten.first.to_s.encode('utf-8')
|
12
|
-
}
|
6
|
+
@network_id = find_network_id
|
7
|
+
@avatar = find_avatar
|
8
|
+
@display_name = find_display_name
|
9
|
+
@username = find_username
|
10
|
+
|
13
11
|
@counts = {
|
14
|
-
followers:
|
15
|
-
views:
|
16
|
-
}
|
12
|
+
followers: find_followers,
|
13
|
+
views: find_views
|
14
|
+
}.delete_if { |_k, v| v.nil? }
|
15
|
+
|
16
|
+
@data = {
|
17
|
+
description: find_description
|
18
|
+
}.delete_if { |_k, v| v.nil? }
|
19
|
+
|
17
20
|
self
|
18
21
|
rescue => e
|
19
|
-
|
22
|
+
record_error __method__, e.message
|
20
23
|
return self
|
21
24
|
end
|
22
25
|
|
26
|
+
def find_network_id
|
27
|
+
find_by_regex(/oid="(\d+)"/)
|
28
|
+
rescue => e
|
29
|
+
record_error __method__, e.message
|
30
|
+
return nil
|
31
|
+
end
|
32
|
+
|
33
|
+
def find_avatar
|
34
|
+
"https:#{find_by_regex(/guidedhelpid="profile_photo"><img src="([^"]+)"/)}"
|
35
|
+
rescue => e
|
36
|
+
record_error __method__, e.message
|
37
|
+
return nil
|
38
|
+
end
|
39
|
+
|
40
|
+
def find_display_name
|
41
|
+
find_by_regex(/og:title" content="([^"]+)"/).gsub(' - Google+', '')
|
42
|
+
rescue => e
|
43
|
+
record_error __method__, e.message
|
44
|
+
return nil
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_username
|
48
|
+
"+#{find_by_regex(/"https:\/\/plus.google.com\/\+(.+?)"/)}"
|
49
|
+
rescue => e
|
50
|
+
record_error __method__, e.message
|
51
|
+
return nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def find_description
|
55
|
+
find_by_regex(/name="Description" content="([^"]+)">/).encode('utf-8')
|
56
|
+
rescue => e
|
57
|
+
record_error __method__, e.message
|
58
|
+
return nil
|
59
|
+
end
|
60
|
+
|
61
|
+
def find_followers
|
62
|
+
if followers = find_by_regex(/">([^"]+)<\/span> followers</)
|
63
|
+
followers.tr(',', '').to_i
|
64
|
+
end
|
65
|
+
rescue => e
|
66
|
+
record_error __method__, e.message
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def find_views
|
71
|
+
if views = find_by_regex(/">([^"]+)<\/span> views</)
|
72
|
+
views.tr(',', '').to_i
|
73
|
+
end
|
74
|
+
rescue => e
|
75
|
+
record_error __method__, e.message
|
76
|
+
return nil
|
77
|
+
end
|
78
|
+
|
23
79
|
end
|
24
80
|
end
|
25
81
|
end
|
@@ -5,25 +5,96 @@ class IdsPlease
|
|
5
5
|
class Instagram < IdsPlease::Grabbers::Base
|
6
6
|
|
7
7
|
def grab_link
|
8
|
-
@network_id =
|
9
|
-
@avatar =
|
10
|
-
@display_name =
|
11
|
-
@username =
|
12
|
-
|
13
|
-
bio: page_source.scan(/"biography":"([^"]+)"/).flatten.first,
|
14
|
-
website: page_source.scan(/"user":{.+"external_url":"([^"]+)"/).flatten.first.gsub('\\', ''),
|
15
|
-
}
|
8
|
+
@network_id = find_network_id
|
9
|
+
@avatar = find_avatar
|
10
|
+
@display_name = find_display_name
|
11
|
+
@username = find_username
|
12
|
+
|
16
13
|
@counts = {
|
17
|
-
media:
|
18
|
-
followed_by:
|
19
|
-
follows:
|
20
|
-
}
|
21
|
-
|
14
|
+
media: find_media,
|
15
|
+
followed_by: find_followed_by,
|
16
|
+
follows: find_follows
|
17
|
+
}.delete_if { |_k, v| v.nil? }
|
18
|
+
|
19
|
+
@data = {
|
20
|
+
bio: find_bio,
|
21
|
+
website: find_website
|
22
|
+
}.delete_if { |_k, v| v.nil? }
|
23
|
+
|
22
24
|
self
|
23
25
|
rescue => e
|
24
|
-
|
26
|
+
record_error __method__, e.message
|
25
27
|
return self
|
26
28
|
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def find_network_id
|
33
|
+
find_by_regex(/"user":{.+"id":"(\d+)"/)
|
34
|
+
rescue => e
|
35
|
+
record_error __method__, e.message
|
36
|
+
return nil
|
37
|
+
end
|
38
|
+
|
39
|
+
def find_avatar
|
40
|
+
find_by_regex(/"user":{.+"profile_pic_url":"([^"]+)"/).gsub('\\', '')
|
41
|
+
rescue => e
|
42
|
+
record_error __method__, e.message
|
43
|
+
return nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def find_display_name
|
47
|
+
_display_name = find_by_regex(/"user":{.+"full_name":"([^"]+)"/)
|
48
|
+
_display_name.gsub(/\\u([\da-fA-F]{4})/) { |_m|
|
49
|
+
[Regexp.last_match(1)].pack('H*').unpack('n*').pack('U*')
|
50
|
+
}
|
51
|
+
rescue => e
|
52
|
+
record_error __method__, e.message
|
53
|
+
return nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def find_username
|
57
|
+
find_by_regex(/"user":{"username":"([^"]+)"/).gsub('\\', '')
|
58
|
+
rescue => e
|
59
|
+
record_error __method__, e.message
|
60
|
+
return nil
|
61
|
+
end
|
62
|
+
|
63
|
+
def find_bio
|
64
|
+
CGI.unescapeHTML(find_by_regex(/"biography":"([^"]+)"/)).strip
|
65
|
+
rescue => e
|
66
|
+
record_error __method__, e.message
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def find_website
|
71
|
+
CGI.unescapeHTML(find_by_regex(/"user":{.+"external_url":"([^"]+)"/).gsub('\\', '')).strip
|
72
|
+
rescue => e
|
73
|
+
record_error __method__, e.message
|
74
|
+
return nil
|
75
|
+
end
|
76
|
+
|
77
|
+
def find_media
|
78
|
+
find_by_regex(/"media":{"count":(\d+)/).to_i
|
79
|
+
rescue => e
|
80
|
+
record_error __method__, e.message
|
81
|
+
return nil
|
82
|
+
end
|
83
|
+
|
84
|
+
def find_followed_by
|
85
|
+
find_by_regex(/"followed_by":{"count":(\d+)/).to_i
|
86
|
+
rescue => e
|
87
|
+
record_error __method__, e.message
|
88
|
+
return nil
|
89
|
+
end
|
90
|
+
|
91
|
+
def find_follows
|
92
|
+
find_by_regex(/"follows":{"count":(\d+)/).to_i
|
93
|
+
rescue => e
|
94
|
+
record_error __method__, e.message
|
95
|
+
return nil
|
96
|
+
end
|
97
|
+
|
27
98
|
end
|
28
99
|
end
|
29
100
|
end
|
@@ -9,17 +9,17 @@ class IdsPlease
|
|
9
9
|
uid_url = "http://appsmail.ru/platform/#{link.split('/')[-2..-1].join('/')}"
|
10
10
|
@network_id = JSON.parse(open(uid_url).read)['uid']
|
11
11
|
@username, type = get_name_and_type(link)
|
12
|
-
@avatar =
|
13
|
-
@display_name =
|
12
|
+
@avatar = find_by_regex(/profile__avatar" src="([^"]+)/)
|
13
|
+
@display_name = find_by_regex(/h1.+title="([^"]+)/)
|
14
14
|
@display_name = CGI.unescapeHTML(@display_name) if @display_name
|
15
15
|
@data = {
|
16
16
|
type: type,
|
17
|
-
description:
|
17
|
+
description: find_by_regex(/profile__content_mainInfo" title="([^"]+)/)
|
18
18
|
}
|
19
19
|
@data[:description] = CGI.unescapeHTML(@data[:description]) if @data[:description]
|
20
20
|
self
|
21
21
|
rescue => e
|
22
|
-
|
22
|
+
record_error __method__, e.message
|
23
23
|
return self
|
24
24
|
end
|
25
25
|
|
@@ -3,32 +3,118 @@ class IdsPlease
|
|
3
3
|
class Twitter < IdsPlease::Grabbers::Base
|
4
4
|
|
5
5
|
def grab_link
|
6
|
-
@network_id =
|
7
|
-
@avatar =
|
8
|
-
@display_name =
|
9
|
-
@username =
|
10
|
-
|
11
|
-
{
|
12
|
-
description: page_source.scan(/ProfileHeaderCard-bio[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
|
13
|
-
location: page_source.scan(/ProfileHeaderCard-locationText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
|
14
|
-
join_date: page_source.scan(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
|
15
|
-
}.each do |k, v|
|
16
|
-
next if v.nil? || v == ''
|
17
|
-
@data[k] = CGI.unescapeHTML(v).strip
|
18
|
-
end
|
6
|
+
@network_id = find_network_id
|
7
|
+
@avatar = find_avatar
|
8
|
+
@display_name = find_display_name
|
9
|
+
@username = find_username
|
10
|
+
|
19
11
|
@counts = {
|
20
|
-
tweets:
|
21
|
-
following:
|
22
|
-
followers:
|
23
|
-
favorites:
|
24
|
-
lists:
|
25
|
-
}
|
12
|
+
tweets: find_tweets,
|
13
|
+
following: find_following,
|
14
|
+
followers: find_followers,
|
15
|
+
favorites: find_favorites,
|
16
|
+
lists: find_listed
|
17
|
+
}.delete_if { |_k, v| v.nil? }
|
18
|
+
|
19
|
+
@data = {
|
20
|
+
description: find_description,
|
21
|
+
location: find_location,
|
22
|
+
join_date: find_join_date
|
23
|
+
}.delete_if { |_k, v| v.nil? }
|
24
|
+
|
26
25
|
self
|
27
26
|
rescue => e
|
28
|
-
|
27
|
+
record_error __method__, e.message
|
29
28
|
return self
|
30
29
|
end
|
31
30
|
|
31
|
+
def find_network_id
|
32
|
+
find_by_regex(/data-user-id="(\d+)"/)
|
33
|
+
rescue => e
|
34
|
+
record_error __method__, e.message
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
|
38
|
+
def find_avatar
|
39
|
+
find_by_regex(/ProfileAvatar-image " src="([^"]+)"/)
|
40
|
+
rescue => e
|
41
|
+
record_error __method__, e.message
|
42
|
+
return nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def find_display_name
|
46
|
+
find_by_regex(/ProfileHeaderCard-nameLink[^>]+>([^<]+)</)
|
47
|
+
rescue => e
|
48
|
+
record_error __method__, e.message
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
|
52
|
+
def find_username
|
53
|
+
find_by_regex(/<title>[^\(]+\(@([^\)]+)\)/)
|
54
|
+
rescue => e
|
55
|
+
record_error __method__, e.message
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
|
59
|
+
def find_description
|
60
|
+
_desc = find_by_regex(/ProfileHeaderCard-bio[^>]+>([^<]+)</)
|
61
|
+
CGI.unescapeHTML(_desc.encode('utf-8')).strip
|
62
|
+
rescue => e
|
63
|
+
record_error __method__, e.message
|
64
|
+
return nil
|
65
|
+
end
|
66
|
+
|
67
|
+
def find_location
|
68
|
+
_loc = find_by_regex(/ProfileHeaderCard-locationText[^>]+>([^<]+)</)
|
69
|
+
CGI.unescapeHTML(_loc.encode('utf-8')).strip
|
70
|
+
rescue => e
|
71
|
+
record_error __method__, e.message
|
72
|
+
return nil
|
73
|
+
end
|
74
|
+
|
75
|
+
def find_join_date
|
76
|
+
_date = find_by_regex(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</)
|
77
|
+
CGI.unescapeHTML(_date.encode('utf-8')).strip
|
78
|
+
rescue => e
|
79
|
+
record_error __method__, e.message
|
80
|
+
return nil
|
81
|
+
end
|
82
|
+
|
83
|
+
def find_tweets
|
84
|
+
find_by_regex(/statuses_count":(\d+),"/).to_i
|
85
|
+
rescue => e
|
86
|
+
record_error __method__, e.message
|
87
|
+
return nil
|
88
|
+
end
|
89
|
+
|
90
|
+
def find_followers
|
91
|
+
find_by_regex(/followers_count":(\d+),"/).to_i
|
92
|
+
rescue => e
|
93
|
+
record_error __method__, e.message
|
94
|
+
return nil
|
95
|
+
end
|
96
|
+
|
97
|
+
def find_following
|
98
|
+
find_by_regex(/friends_count":(\d+),"/).to_i
|
99
|
+
rescue => e
|
100
|
+
record_error __method__, e.message
|
101
|
+
return nil
|
102
|
+
end
|
103
|
+
|
104
|
+
def find_favorites
|
105
|
+
find_by_regex(/favourites_count":(\d+),"/).to_i
|
106
|
+
rescue => e
|
107
|
+
record_error __method__, e.message
|
108
|
+
return nil
|
109
|
+
end
|
110
|
+
|
111
|
+
def find_listed
|
112
|
+
find_by_regex(/listed_count":(\d+),"/).to_i
|
113
|
+
rescue => e
|
114
|
+
record_error __method__, e.message
|
115
|
+
return nil
|
116
|
+
end
|
117
|
+
|
32
118
|
end
|
33
119
|
end
|
34
120
|
end
|
@@ -1,26 +1,24 @@
|
|
1
1
|
class IdsPlease
|
2
2
|
module Grabbers
|
3
3
|
class Vkontakte < IdsPlease::Grabbers::Base
|
4
|
-
|
5
4
|
def grab_link
|
6
5
|
agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'
|
7
6
|
@page_source ||= open(link, 'User-Agent' => agent).read.encode('utf-8')
|
8
|
-
@network_id =
|
7
|
+
@network_id = find_by_regex(/href="\/wall(-\d+)_/)
|
9
8
|
@username = @link.to_s.split('vk.com/').last.gsub('/', '')
|
10
|
-
@avatar =
|
9
|
+
@avatar = find_by_regex(/page_avatar.+\n.+src="([^"]+)/)
|
11
10
|
@avatar = CGI.unescapeHTML(@avatar) if @avatar
|
12
|
-
@display_name =
|
11
|
+
@display_name = find_by_regex(/page_name">([^<]+)/)
|
13
12
|
@display_name = CGI.unescapeHTML(@display_name) if @display_name
|
14
13
|
@data = {
|
15
|
-
description:
|
14
|
+
description: find_by_regex(/description" content="([^"]+)/)
|
16
15
|
}
|
17
16
|
@data[:description] = CGI.unescapeHTML(@data[:description]) if @data[:description]
|
18
17
|
self
|
19
18
|
rescue => e
|
20
|
-
|
19
|
+
record_error __method__, e.message
|
21
20
|
return self
|
22
21
|
end
|
23
|
-
|
24
22
|
end
|
25
23
|
end
|
26
24
|
end
|
data/lib/ids_please/grabbers.rb
CHANGED
@@ -8,14 +8,14 @@ require_relative 'grabbers/google_plus'
|
|
8
8
|
|
9
9
|
class IdsPlease
|
10
10
|
module Grabbers
|
11
|
-
|
11
|
+
|
12
12
|
NETWORKS = {
|
13
13
|
facebook: IdsPlease::Grabbers::Facebook,
|
14
14
|
vkontakte: IdsPlease::Grabbers::Vkontakte,
|
15
15
|
twitter: IdsPlease::Grabbers::Twitter,
|
16
16
|
instagram: IdsPlease::Grabbers::Instagram,
|
17
17
|
mailru: IdsPlease::Grabbers::Mailru,
|
18
|
-
google_plus: IdsPlease::Grabbers::GooglePlus
|
18
|
+
google_plus: IdsPlease::Grabbers::GooglePlus
|
19
19
|
}
|
20
20
|
|
21
21
|
def self.each
|
@@ -25,6 +25,6 @@ class IdsPlease
|
|
25
25
|
def self.by_symbol(sym)
|
26
26
|
NETWORKS[sym]
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
end
|
30
30
|
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
class IdsPlease
|
2
2
|
module Parsers
|
3
3
|
class Base
|
4
|
-
|
5
4
|
class << self
|
5
|
+
|
6
6
|
def to_sym
|
7
|
-
|
7
|
+
name.split('::').last.downcase.to_sym
|
8
8
|
end
|
9
9
|
|
10
10
|
def interact(links)
|
@@ -15,8 +15,6 @@ class IdsPlease
|
|
15
15
|
end.compact
|
16
16
|
end
|
17
17
|
|
18
|
-
private
|
19
|
-
|
20
18
|
def parse_link(link)
|
21
19
|
link.path.split('/')[1]
|
22
20
|
end
|
@@ -24,8 +22,8 @@ class IdsPlease
|
|
24
22
|
def valid_id_regex
|
25
23
|
/\A([\w\.\+-]{2,})/
|
26
24
|
end
|
27
|
-
end
|
28
25
|
|
26
|
+
end
|
29
27
|
end
|
30
28
|
end
|
31
29
|
end
|
data/lib/ids_please/parsers.rb
CHANGED
@@ -44,13 +44,13 @@ class IdsPlease
|
|
44
44
|
mailru: IdsPlease::Parsers::Mailru
|
45
45
|
}
|
46
46
|
|
47
|
-
def self.each
|
48
|
-
NETWORKS.values.each { |n| yield n }
|
49
|
-
end
|
50
|
-
|
51
47
|
def self.by_symbol(sym)
|
52
48
|
NETWORKS[sym]
|
53
49
|
end
|
54
50
|
|
51
|
+
def self.to_a
|
52
|
+
NETWORKS.values
|
53
|
+
end
|
54
|
+
|
55
55
|
end
|
56
56
|
end
|
data/lib/ids_please/version.rb
CHANGED
data/lib/ids_please.rb
CHANGED
@@ -26,7 +26,7 @@ class IdsPlease
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def recognized
|
29
|
-
Hash[@recognized.map { |parser, links| [
|
29
|
+
Hash[@recognized.map { |parser, links| [parser.to_sym, links] }]
|
30
30
|
end
|
31
31
|
|
32
32
|
def parse
|
@@ -39,22 +39,22 @@ class IdsPlease
|
|
39
39
|
|
40
40
|
private
|
41
41
|
|
42
|
-
def interact(
|
42
|
+
def interact(interactors_group = :parsers)
|
43
43
|
recognize
|
44
44
|
interacted = Hash.new { |hash, network| hash[network] = [] }
|
45
45
|
@recognized.each do |network, links|
|
46
|
-
interactor = IdsPlease.send(
|
46
|
+
interactor = IdsPlease.send(interactors_group).by_symbol(network)
|
47
47
|
interacted[network].concat interactor.interact(links)
|
48
48
|
end
|
49
|
-
|
49
|
+
instance_variable_set(interacted_var(interactors_group), interacted)
|
50
50
|
|
51
51
|
interacted
|
52
52
|
end
|
53
53
|
|
54
|
-
def interacted_var(
|
55
|
-
if
|
54
|
+
def interacted_var(interactors_group)
|
55
|
+
if interactors_group == :parsers
|
56
56
|
:@parsed
|
57
|
-
elsif
|
57
|
+
elsif interactors_group == :grabbers
|
58
58
|
:@grabbed
|
59
59
|
else
|
60
60
|
throw 'Wrong interactors type'
|
@@ -64,14 +64,14 @@ class IdsPlease
|
|
64
64
|
def recognize_link(link)
|
65
65
|
link = "http://#{link}" unless link =~ /\Ahttps?:\/\//
|
66
66
|
parsed_link = URI(URI.encode(link))
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
67
|
+
|
68
|
+
network = IdsPlease::Parsers.to_a.find { |n| parsed_link.host =~ n::MASK }
|
69
|
+
|
70
|
+
if network
|
71
|
+
@recognized[network.to_sym] ||= []
|
72
|
+
@recognized[network.to_sym] << parsed_link
|
73
|
+
else
|
74
|
+
@unrecognized << link
|
73
75
|
end
|
74
|
-
unrecognized << link
|
75
76
|
end
|
76
|
-
|
77
77
|
end
|
@@ -1,51 +1,49 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe IdsPlease do
|
4
|
-
|
5
4
|
recognazible_links = %w(
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
5
|
+
https://www.facebook.com/fb_acc
|
6
|
+
https://facebook.com/fb_acc2<U+200>
|
7
|
+
http://instagram.com/inst_acc
|
8
|
+
http://hi5.com/hi5_acc
|
9
|
+
http://www.hi5.com/profile.html?uid=12341234
|
10
|
+
http://pinterest.com/pinterest_acc
|
11
|
+
http://blogger-acc.blogspot.com
|
12
|
+
http://livejournal-acc.livejournal.com
|
13
|
+
http://livejournal-acc2.livejournal.ru
|
14
|
+
https://www.blogger.com/blogger.g?blogID=12341234#overview/src=dashboard
|
15
|
+
http://vk.com/vk_acc
|
16
|
+
http://linkedin.com/in/xnutsive
|
17
|
+
http://www.linkedin.com/company/evil-martians
|
18
|
+
http://www.linkedin.com/profile/view?id=12341234&trk=nav_responsive_tab_profile
|
19
|
+
http://Ameblo.jp/ameba_acc
|
20
|
+
http://reddit.com/user/reddit_acc
|
21
|
+
https://twitter.com/twi_acc
|
22
|
+
https://vimeo.com/vimeo_acc
|
23
|
+
https://plus.google.com/12341234
|
24
|
+
https://plus.google.com/+VladimirBokov
|
25
|
+
https://soundcloud.com/sc_acc
|
26
|
+
https://youtube.com/channels/yb_acc
|
27
|
+
http://tumblr-acc.tumblr.com
|
28
|
+
http://odnoklassniki.com/profile/12341234/about
|
29
|
+
http://ok.ru/profile/12341234/about
|
30
|
+
http://odnoklassniki.com/group/43214321/about?some=123
|
31
|
+
http://moikrug-acc.moikrug.ru
|
32
|
+
https://my.mail.ru/community/test-group-102/
|
33
|
+
https://my.mail.ru/mail/gazay/
|
34
|
+
)
|
36
35
|
|
37
36
|
not_recognazible_links = %w(
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
http://fucebook.com/not_recognized
|
38
|
+
http://vka.com/not_recognized
|
39
|
+
)
|
41
40
|
|
42
41
|
not_parseble_links = %w(
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
http://vk.com
|
43
|
+
http://soundcloud.com
|
44
|
+
)
|
46
45
|
|
47
46
|
describe 'recognize' do
|
48
|
-
|
49
47
|
it 'not recognizes wrong links' do
|
50
48
|
recognizer = IdsPlease.new(*not_recognazible_links)
|
51
49
|
recognizer.recognize
|
@@ -138,13 +136,10 @@ describe IdsPlease do
|
|
138
136
|
it 'recognizes mailru links' do
|
139
137
|
expect(@recognizer.recognized[:mailru].count).to eq(2)
|
140
138
|
end
|
141
|
-
|
142
139
|
end
|
143
|
-
|
144
140
|
end
|
145
141
|
|
146
142
|
describe 'parse' do
|
147
|
-
|
148
143
|
it 'not parse wrong links' do
|
149
144
|
@recognizer = IdsPlease.new(*not_parseble_links)
|
150
145
|
@recognizer.parse
|
@@ -170,7 +165,7 @@ describe IdsPlease do
|
|
170
165
|
end
|
171
166
|
|
172
167
|
it 'get right id from facebook link' do
|
173
|
-
expect(@recognizer.parsed[:facebook]).to eq(
|
168
|
+
expect(@recognizer.parsed[:facebook]).to eq(%w(fb_acc fb_acc2))
|
174
169
|
end
|
175
170
|
|
176
171
|
it 'get right id from linkedin link' do
|
@@ -210,7 +205,7 @@ describe IdsPlease do
|
|
210
205
|
end
|
211
206
|
|
212
207
|
it 'get right id from hi5 link' do
|
213
|
-
expect(@recognizer.parsed[:hi5]).to eq(
|
208
|
+
expect(@recognizer.parsed[:hi5]).to eq(%w(hi5_acc 12341234))
|
214
209
|
end
|
215
210
|
|
216
211
|
it 'get right id from soundcloud link' do
|
@@ -234,14 +229,12 @@ describe IdsPlease do
|
|
234
229
|
end
|
235
230
|
|
236
231
|
it 'get right id from odnoklassniki link' do
|
237
|
-
expect(@recognizer.parsed[:odnoklassniki].sort).to eq(
|
232
|
+
expect(@recognizer.parsed[:odnoklassniki].sort).to eq(%w(12341234 43214321 12341234).sort)
|
238
233
|
end
|
239
234
|
|
240
235
|
it 'get right id from moikrug link' do
|
241
236
|
expect(@recognizer.parsed[:moikrug].first).to eq('moikrug-acc')
|
242
237
|
end
|
243
|
-
|
244
238
|
end
|
245
239
|
end
|
246
|
-
|
247
240
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ids_please
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- gazay
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -40,7 +40,8 @@ dependencies:
|
|
40
40
|
version: '0'
|
41
41
|
description: Helps to get ids or screen names from links to social network accounts
|
42
42
|
email: alex.gaziev@gmail.com
|
43
|
-
executables:
|
43
|
+
executables:
|
44
|
+
- ids_please
|
44
45
|
extensions: []
|
45
46
|
extra_rdoc_files:
|
46
47
|
- LICENSE
|
@@ -51,8 +52,10 @@ files:
|
|
51
52
|
- LICENSE
|
52
53
|
- README.md
|
53
54
|
- Rakefile
|
55
|
+
- bin/ids_please
|
54
56
|
- ids_please.gemspec
|
55
57
|
- lib/ids_please.rb
|
58
|
+
- lib/ids_please/cli.rb
|
56
59
|
- lib/ids_please/grabbers.rb
|
57
60
|
- lib/ids_please/grabbers/base.rb
|
58
61
|
- lib/ids_please/grabbers/facebook.rb
|
@@ -106,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
109
|
version: '0'
|
107
110
|
requirements: []
|
108
111
|
rubyforge_project:
|
109
|
-
rubygems_version: 2.4.5
|
112
|
+
rubygems_version: 2.4.5.1
|
110
113
|
signing_key:
|
111
114
|
specification_version: 4
|
112
115
|
summary: Helps to get ids or screen names from links to social network accounts
|