ids_please 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +3 -3
- data/bin/ids_please +7 -0
- data/ids_please.gemspec +11 -9
- data/lib/ids_please/cli.rb +112 -0
- data/lib/ids_please/grabbers/base.rb +22 -2
- data/lib/ids_please/grabbers/facebook.rb +77 -24
- data/lib/ids_please/grabbers/google_plus.rb +67 -11
- data/lib/ids_please/grabbers/instagram.rb +85 -14
- data/lib/ids_please/grabbers/mailru.rb +4 -4
- data/lib/ids_please/grabbers/twitter.rb +106 -20
- data/lib/ids_please/grabbers/vkontakte.rb +5 -7
- data/lib/ids_please/grabbers.rb +3 -3
- data/lib/ids_please/parsers/base.rb +3 -5
- data/lib/ids_please/parsers/facebook.rb +0 -2
- data/lib/ids_please/parsers/google_plus.rb +0 -2
- data/lib/ids_please/parsers/hi5.rb +0 -2
- data/lib/ids_please/parsers/linkedin.rb +0 -2
- data/lib/ids_please/parsers/livejournal.rb +0 -2
- data/lib/ids_please/parsers/mailru.rb +0 -2
- data/lib/ids_please/parsers/moikrug.rb +0 -2
- data/lib/ids_please/parsers/odnoklassniki.rb +0 -2
- data/lib/ids_please/parsers/reddit.rb +0 -2
- data/lib/ids_please/parsers/tumblr.rb +0 -2
- data/lib/ids_please/parsers/twitter.rb +0 -2
- data/lib/ids_please/parsers/vkontakte.rb +0 -2
- data/lib/ids_please/parsers/youtube.rb +0 -2
- data/lib/ids_please/parsers.rb +4 -4
- data/lib/ids_please/version.rb +3 -1
- data/lib/ids_please.rb +15 -15
- data/spec/ids_please/basic_spec.rb +39 -46
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 728f2add26771adaaedbb2afb42fa59126e19a10
|
4
|
+
data.tar.gz: 56285e208b28c26303362a8070b3e538eecac37a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a26e536a30e7461fd9396df7d1db9806affb1c9212fdc4850eb853588a706b705c2b16c5f012cff8c7d04be30e88408ffbcea8f28967109d1ef7f81c1995eaec
|
7
|
+
data.tar.gz: d051a8a184798fc5d080f65db17bc78ce265e6030748923c3a6e6f0599154342ec1ec1de626ddb077c934c086015ea59d8d0b7d68c626c53113f26306e70a75d
|
data/Rakefile
CHANGED
@@ -4,9 +4,9 @@ require 'bundler'
|
|
4
4
|
Bundler::GemHelper.install_tasks
|
5
5
|
|
6
6
|
desc 'Run all tests by default'
|
7
|
-
task :
|
7
|
+
task default: :spec
|
8
8
|
|
9
9
|
require 'rspec/core/rake_task'
|
10
10
|
RSpec::Core::RakeTask.new do |t|
|
11
|
-
t.rspec_opts = [
|
12
|
-
end
|
11
|
+
t.rspec_opts = ['--color', '--format doc']
|
12
|
+
end
|
data/bin/ids_please
ADDED
data/ids_please.gemspec
CHANGED
@@ -4,17 +4,19 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'ids_please/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
|
-
s.name =
|
7
|
+
s.name = 'ids_please'
|
8
8
|
s.version = IdsPlease::VERSION
|
9
|
-
s.authors = [
|
10
|
-
s.description =
|
11
|
-
s.summary =
|
9
|
+
s.authors = ['gazay']
|
10
|
+
s.description = 'Helps to get ids or screen names from links to social network accounts'
|
11
|
+
s.summary = 'Helps to get ids or screen names from links to social network accounts'
|
12
12
|
s.licenses = ['MIT']
|
13
|
-
s.email =
|
14
|
-
s.extra_rdoc_files = [
|
15
|
-
s.rdoc_options = [
|
16
|
-
s.homepage =
|
17
|
-
s.require_paths = [
|
13
|
+
s.email = 'alex.gaziev@gmail.com'
|
14
|
+
s.extra_rdoc_files = ['LICENSE']
|
15
|
+
s.rdoc_options = ['--charset=UTF-8']
|
16
|
+
s.homepage = 'http://github.com/gazay/ids_please'
|
17
|
+
s.require_paths = ['lib']
|
18
|
+
s.bindir = 'bin'
|
19
|
+
s.executables = 'ids_please'
|
18
20
|
s.files = `git ls-files`.split("\n")
|
19
21
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
22
|
s.add_development_dependency 'rake'
|
@@ -0,0 +1,112 @@
|
|
1
|
+
class IdsPlease
|
2
|
+
module CLI
|
3
|
+
def self.run(args)
|
4
|
+
command = args.shift
|
5
|
+
case command
|
6
|
+
when 'grab', 'parse', 'recognize'
|
7
|
+
when 'help', nil
|
8
|
+
help
|
9
|
+
exit
|
10
|
+
else
|
11
|
+
abort "Unknown command. Enter 'ids_please help' for instructions"
|
12
|
+
end
|
13
|
+
|
14
|
+
links = args
|
15
|
+
if links.empty?
|
16
|
+
abort "You didn't enter any links. Enter 'ids_please help' for instructions"
|
17
|
+
end
|
18
|
+
|
19
|
+
ids = IdsPlease.new(*links)
|
20
|
+
case command
|
21
|
+
when 'grab'
|
22
|
+
grab(ids)
|
23
|
+
when 'parse'
|
24
|
+
parse(ids)
|
25
|
+
when 'recognize'
|
26
|
+
recognize(ids)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
module_function
|
31
|
+
|
32
|
+
def grab(ids)
|
33
|
+
ids.grab
|
34
|
+
ids.grabbed.each do |social_network, grabbers_array|
|
35
|
+
puts social_network.to_s.capitalize + ': '
|
36
|
+
grabbers_array.each do |grabber|
|
37
|
+
grabber.to_h.each do |property, value|
|
38
|
+
unless value.nil? || value.to_s.empty? || property == :page_source
|
39
|
+
|
40
|
+
if value.class == Hash
|
41
|
+
value.delete_if { |_, v| v.nil? }
|
42
|
+
unless value.empty?
|
43
|
+
puts " #{property}: "
|
44
|
+
value.each do |k, v|
|
45
|
+
puts " #{k}: #{v}"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
else
|
49
|
+
puts " #{property}: #{value}"
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
puts "\n" unless grabbers_array.last == grabber
|
55
|
+
end
|
56
|
+
puts "\n" unless ids.grabbed.to_a.last[0] == social_network
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def parse(ids)
|
61
|
+
ids.parse
|
62
|
+
ids.parsed.each do |social_network, permalinks_array|
|
63
|
+
puts social_network.to_s.capitalize + ': '
|
64
|
+
permalinks_array.each do |permalink|
|
65
|
+
puts " #{permalink}"
|
66
|
+
end
|
67
|
+
puts "\n" unless ids.parsed.to_a.last[0] == social_network
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def recognize(ids)
|
72
|
+
ids.recognize
|
73
|
+
unless ids.recognized.empty?
|
74
|
+
puts 'Recognized:'
|
75
|
+
ids.recognized.each do |social_network, urls_array|
|
76
|
+
puts " #{social_network.to_s.capitalize}: "
|
77
|
+
urls_array.each do |url|
|
78
|
+
puts " #{url}"
|
79
|
+
end
|
80
|
+
puts "\n"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
unless ids.unrecognized.empty?
|
85
|
+
puts 'Unrecognized:'
|
86
|
+
ids.unrecognized.each do |url|
|
87
|
+
puts " #{url}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def help
|
93
|
+
puts <<-HELP.gsub(/^ {8}/, '')
|
94
|
+
IDs, please
|
95
|
+
Grab some hidden in html data from social account page
|
96
|
+
Get social network IDs or screen names from links to social network accounts
|
97
|
+
|
98
|
+
Usage:
|
99
|
+
ids_please command [links]
|
100
|
+
|
101
|
+
Available commands:
|
102
|
+
grab grab some hidden in html data from social account page (avatar, username, id...)
|
103
|
+
parse get screen names from links to social network accounts
|
104
|
+
recognize check that the link is for a known social network
|
105
|
+
|
106
|
+
Examples:
|
107
|
+
ids_please grab https://instagram.com/microsoft
|
108
|
+
ids_please parse https://facebook.com/Microsoft https://instagram.com/microsoft
|
109
|
+
HELP
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -8,13 +8,20 @@ class IdsPlease
|
|
8
8
|
links.map { |l| self.new(l).grab_link }
|
9
9
|
end
|
10
10
|
|
11
|
-
attr_reader :avatar,
|
11
|
+
attr_reader :avatar,
|
12
|
+
:display_name,
|
13
|
+
:username,
|
14
|
+
:link,
|
15
|
+
:page_source,
|
16
|
+
:network_id,
|
17
|
+
:data,
|
18
|
+
:counts
|
12
19
|
|
13
20
|
def initialize(link)
|
14
21
|
@link = link
|
15
22
|
end
|
16
23
|
|
17
|
-
def grab_link(
|
24
|
+
def grab_link(_link)
|
18
25
|
throw 'Base grabber can not grab anything'
|
19
26
|
end
|
20
27
|
|
@@ -48,6 +55,19 @@ class IdsPlease
|
|
48
55
|
def page_source
|
49
56
|
@page_source ||= open(link).read
|
50
57
|
end
|
58
|
+
|
59
|
+
def errors
|
60
|
+
@errors ||= []
|
61
|
+
end
|
62
|
+
|
63
|
+
def record_error(event, message)
|
64
|
+
errors << "#{event} has #{message}"
|
65
|
+
end
|
66
|
+
|
67
|
+
def find_by_regex(reg)
|
68
|
+
page_source.scan(reg).flatten.first
|
69
|
+
end
|
70
|
+
|
51
71
|
end
|
52
72
|
end
|
53
73
|
end
|
@@ -3,43 +3,96 @@ class IdsPlease
|
|
3
3
|
class Facebook < IdsPlease::Grabbers::Base
|
4
4
|
|
5
5
|
def grab_link
|
6
|
-
@network_id =
|
7
|
-
@avatar =
|
8
|
-
@display_name =
|
9
|
-
@username =
|
10
|
-
|
11
|
-
@display_name = CGI.unescapeHTML(@display_name.encode('utf-8')) if @display_name
|
12
|
-
@data = {}
|
13
|
-
{
|
14
|
-
type: page_source.scan(/og:type" content="([^"]+)"/).flatten.first.to_s.encode('utf-8'),
|
15
|
-
description: page_source.scan(/og:description" content="([^"]+)"/).flatten.first.to_s.encode('utf-8'),
|
16
|
-
}.each do |k, v|
|
17
|
-
next if v.nil? || v == ''
|
18
|
-
@data[k] = CGI.unescapeHTML(v).strip
|
19
|
-
end
|
6
|
+
@network_id = find_network_id
|
7
|
+
@avatar = find_avatar
|
8
|
+
@display_name = find_display_name
|
9
|
+
@username = find_username
|
10
|
+
|
20
11
|
@counts = {
|
21
|
-
likes:
|
22
|
-
visits:
|
23
|
-
}.delete_if {|
|
12
|
+
likes: find_likes,
|
13
|
+
visits: find_visits
|
14
|
+
}.delete_if { |_k, v| v.nil? }
|
15
|
+
|
16
|
+
@data = {
|
17
|
+
type: find_type,
|
18
|
+
description: find_description
|
19
|
+
}.delete_if { |_k, v| v.nil? }
|
20
|
+
|
24
21
|
self
|
25
22
|
rescue => e
|
26
|
-
|
23
|
+
record_error __method__, e.message
|
27
24
|
return self
|
28
25
|
end
|
29
26
|
|
30
|
-
|
31
|
-
|
27
|
+
private
|
28
|
+
|
29
|
+
def find_network_id
|
30
|
+
find_by_regex(/entity_id":"(\d+)"/)
|
31
|
+
rescue => e
|
32
|
+
record_error __method__, e.message
|
33
|
+
return nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def find_avatar
|
37
|
+
CGI.unescapeHTML(
|
38
|
+
find_by_regex(/profilePic\simg"\salt=[^=]+="([^"]+)/).encode('utf-8')
|
39
|
+
)
|
40
|
+
rescue => e
|
41
|
+
record_error __method__, e.message
|
42
|
+
return nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def find_display_name
|
46
|
+
CGI.unescapeHTML(
|
47
|
+
find_by_regex(/pageTitle">([^<\|]+)/).strip.encode('utf-8')
|
48
|
+
)
|
49
|
+
rescue => e
|
50
|
+
record_error __method__, e.message
|
51
|
+
return nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def find_username
|
55
|
+
find_by_regex(/link\srel="canonical"\shref="https:\/\/facebook\.com\/([^"]+)/) ||
|
56
|
+
find_by_regex(/;\sURL=\/([^\/\?]+)/)
|
57
|
+
rescue => e
|
58
|
+
record_error __method__, e.message
|
59
|
+
return nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def find_type
|
63
|
+
find_by_regex(/type":"Person/) ? 'perosnal' : 'group'
|
64
|
+
rescue => e
|
65
|
+
record_error __method__, e.message
|
66
|
+
return nil
|
67
|
+
end
|
68
|
+
|
69
|
+
def find_description
|
70
|
+
CGI.unescapeHTML(
|
71
|
+
find_by_regex(/name="description" content="([^"]+)"/).encode('utf-8')
|
72
|
+
).strip
|
73
|
+
rescue => e
|
74
|
+
record_error __method__, e.message
|
75
|
+
return nil
|
76
|
+
end
|
77
|
+
|
78
|
+
def find_likes
|
79
|
+
if likes = find_by_regex(/>([^"]+) <span class=".+">likes/)
|
80
|
+
likes.tr(',', '').to_i
|
81
|
+
end
|
32
82
|
rescue => e
|
33
|
-
|
83
|
+
record_error __method__, e.message
|
34
84
|
return nil
|
35
85
|
end
|
36
86
|
|
37
|
-
def
|
38
|
-
|
87
|
+
def find_visits
|
88
|
+
if visits = find_by_regex(/likes.+>([^"]+)<\/span> <span class=".+">visits/)
|
89
|
+
visits.tr(',', '').to_i
|
90
|
+
end
|
39
91
|
rescue => e
|
40
|
-
|
92
|
+
record_error __method__, e.message
|
41
93
|
return nil
|
42
94
|
end
|
95
|
+
|
43
96
|
end
|
44
97
|
end
|
45
98
|
end
|
@@ -3,23 +3,79 @@ class IdsPlease
|
|
3
3
|
class GooglePlus < IdsPlease::Grabbers::Base
|
4
4
|
|
5
5
|
def grab_link
|
6
|
-
@network_id =
|
7
|
-
@avatar =
|
8
|
-
@display_name =
|
9
|
-
@username =
|
10
|
-
|
11
|
-
description: page_source.scan(/name="Description" content="([^"]+)">/).flatten.first.to_s.encode('utf-8')
|
12
|
-
}
|
6
|
+
@network_id = find_network_id
|
7
|
+
@avatar = find_avatar
|
8
|
+
@display_name = find_display_name
|
9
|
+
@username = find_username
|
10
|
+
|
13
11
|
@counts = {
|
14
|
-
followers:
|
15
|
-
views:
|
16
|
-
}
|
12
|
+
followers: find_followers,
|
13
|
+
views: find_views
|
14
|
+
}.delete_if { |_k, v| v.nil? }
|
15
|
+
|
16
|
+
@data = {
|
17
|
+
description: find_description
|
18
|
+
}.delete_if { |_k, v| v.nil? }
|
19
|
+
|
17
20
|
self
|
18
21
|
rescue => e
|
19
|
-
|
22
|
+
record_error __method__, e.message
|
20
23
|
return self
|
21
24
|
end
|
22
25
|
|
26
|
+
def find_network_id
|
27
|
+
find_by_regex(/oid="(\d+)"/)
|
28
|
+
rescue => e
|
29
|
+
record_error __method__, e.message
|
30
|
+
return nil
|
31
|
+
end
|
32
|
+
|
33
|
+
def find_avatar
|
34
|
+
"https:#{find_by_regex(/guidedhelpid="profile_photo"><img src="([^"]+)"/)}"
|
35
|
+
rescue => e
|
36
|
+
record_error __method__, e.message
|
37
|
+
return nil
|
38
|
+
end
|
39
|
+
|
40
|
+
def find_display_name
|
41
|
+
find_by_regex(/og:title" content="([^"]+)"/).gsub(' - Google+', '')
|
42
|
+
rescue => e
|
43
|
+
record_error __method__, e.message
|
44
|
+
return nil
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_username
|
48
|
+
"+#{find_by_regex(/"https:\/\/plus.google.com\/\+(.+?)"/)}"
|
49
|
+
rescue => e
|
50
|
+
record_error __method__, e.message
|
51
|
+
return nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def find_description
|
55
|
+
find_by_regex(/name="Description" content="([^"]+)">/).encode('utf-8')
|
56
|
+
rescue => e
|
57
|
+
record_error __method__, e.message
|
58
|
+
return nil
|
59
|
+
end
|
60
|
+
|
61
|
+
def find_followers
|
62
|
+
if followers = find_by_regex(/">([^"]+)<\/span> followers</)
|
63
|
+
followers.tr(',', '').to_i
|
64
|
+
end
|
65
|
+
rescue => e
|
66
|
+
record_error __method__, e.message
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def find_views
|
71
|
+
if views = find_by_regex(/">([^"]+)<\/span> views</)
|
72
|
+
views.tr(',', '').to_i
|
73
|
+
end
|
74
|
+
rescue => e
|
75
|
+
record_error __method__, e.message
|
76
|
+
return nil
|
77
|
+
end
|
78
|
+
|
23
79
|
end
|
24
80
|
end
|
25
81
|
end
|
@@ -5,25 +5,96 @@ class IdsPlease
|
|
5
5
|
class Instagram < IdsPlease::Grabbers::Base
|
6
6
|
|
7
7
|
def grab_link
|
8
|
-
@network_id =
|
9
|
-
@avatar =
|
10
|
-
@display_name =
|
11
|
-
@username =
|
12
|
-
|
13
|
-
bio: page_source.scan(/"biography":"([^"]+)"/).flatten.first,
|
14
|
-
website: page_source.scan(/"user":{.+"external_url":"([^"]+)"/).flatten.first.gsub('\\', ''),
|
15
|
-
}
|
8
|
+
@network_id = find_network_id
|
9
|
+
@avatar = find_avatar
|
10
|
+
@display_name = find_display_name
|
11
|
+
@username = find_username
|
12
|
+
|
16
13
|
@counts = {
|
17
|
-
media:
|
18
|
-
followed_by:
|
19
|
-
follows:
|
20
|
-
}
|
21
|
-
|
14
|
+
media: find_media,
|
15
|
+
followed_by: find_followed_by,
|
16
|
+
follows: find_follows
|
17
|
+
}.delete_if { |_k, v| v.nil? }
|
18
|
+
|
19
|
+
@data = {
|
20
|
+
bio: find_bio,
|
21
|
+
website: find_website
|
22
|
+
}.delete_if { |_k, v| v.nil? }
|
23
|
+
|
22
24
|
self
|
23
25
|
rescue => e
|
24
|
-
|
26
|
+
record_error __method__, e.message
|
25
27
|
return self
|
26
28
|
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def find_network_id
|
33
|
+
find_by_regex(/"user":{.+"id":"(\d+)"/)
|
34
|
+
rescue => e
|
35
|
+
record_error __method__, e.message
|
36
|
+
return nil
|
37
|
+
end
|
38
|
+
|
39
|
+
def find_avatar
|
40
|
+
find_by_regex(/"user":{.+"profile_pic_url":"([^"]+)"/).gsub('\\', '')
|
41
|
+
rescue => e
|
42
|
+
record_error __method__, e.message
|
43
|
+
return nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def find_display_name
|
47
|
+
_display_name = find_by_regex(/"user":{.+"full_name":"([^"]+)"/)
|
48
|
+
_display_name.gsub(/\\u([\da-fA-F]{4})/) { |_m|
|
49
|
+
[Regexp.last_match(1)].pack('H*').unpack('n*').pack('U*')
|
50
|
+
}
|
51
|
+
rescue => e
|
52
|
+
record_error __method__, e.message
|
53
|
+
return nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def find_username
|
57
|
+
find_by_regex(/"user":{"username":"([^"]+)"/).gsub('\\', '')
|
58
|
+
rescue => e
|
59
|
+
record_error __method__, e.message
|
60
|
+
return nil
|
61
|
+
end
|
62
|
+
|
63
|
+
def find_bio
|
64
|
+
CGI.unescapeHTML(find_by_regex(/"biography":"([^"]+)"/)).strip
|
65
|
+
rescue => e
|
66
|
+
record_error __method__, e.message
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def find_website
|
71
|
+
CGI.unescapeHTML(find_by_regex(/"user":{.+"external_url":"([^"]+)"/).gsub('\\', '')).strip
|
72
|
+
rescue => e
|
73
|
+
record_error __method__, e.message
|
74
|
+
return nil
|
75
|
+
end
|
76
|
+
|
77
|
+
def find_media
|
78
|
+
find_by_regex(/"media":{"count":(\d+)/).to_i
|
79
|
+
rescue => e
|
80
|
+
record_error __method__, e.message
|
81
|
+
return nil
|
82
|
+
end
|
83
|
+
|
84
|
+
def find_followed_by
|
85
|
+
find_by_regex(/"followed_by":{"count":(\d+)/).to_i
|
86
|
+
rescue => e
|
87
|
+
record_error __method__, e.message
|
88
|
+
return nil
|
89
|
+
end
|
90
|
+
|
91
|
+
def find_follows
|
92
|
+
find_by_regex(/"follows":{"count":(\d+)/).to_i
|
93
|
+
rescue => e
|
94
|
+
record_error __method__, e.message
|
95
|
+
return nil
|
96
|
+
end
|
97
|
+
|
27
98
|
end
|
28
99
|
end
|
29
100
|
end
|
@@ -9,17 +9,17 @@ class IdsPlease
|
|
9
9
|
uid_url = "http://appsmail.ru/platform/#{link.split('/')[-2..-1].join('/')}"
|
10
10
|
@network_id = JSON.parse(open(uid_url).read)['uid']
|
11
11
|
@username, type = get_name_and_type(link)
|
12
|
-
@avatar =
|
13
|
-
@display_name =
|
12
|
+
@avatar = find_by_regex(/profile__avatar" src="([^"]+)/)
|
13
|
+
@display_name = find_by_regex(/h1.+title="([^"]+)/)
|
14
14
|
@display_name = CGI.unescapeHTML(@display_name) if @display_name
|
15
15
|
@data = {
|
16
16
|
type: type,
|
17
|
-
description:
|
17
|
+
description: find_by_regex(/profile__content_mainInfo" title="([^"]+)/)
|
18
18
|
}
|
19
19
|
@data[:description] = CGI.unescapeHTML(@data[:description]) if @data[:description]
|
20
20
|
self
|
21
21
|
rescue => e
|
22
|
-
|
22
|
+
record_error __method__, e.message
|
23
23
|
return self
|
24
24
|
end
|
25
25
|
|
@@ -3,32 +3,118 @@ class IdsPlease
|
|
3
3
|
class Twitter < IdsPlease::Grabbers::Base
|
4
4
|
|
5
5
|
def grab_link
|
6
|
-
@network_id =
|
7
|
-
@avatar =
|
8
|
-
@display_name =
|
9
|
-
@username =
|
10
|
-
|
11
|
-
{
|
12
|
-
description: page_source.scan(/ProfileHeaderCard-bio[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
|
13
|
-
location: page_source.scan(/ProfileHeaderCard-locationText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
|
14
|
-
join_date: page_source.scan(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
|
15
|
-
}.each do |k, v|
|
16
|
-
next if v.nil? || v == ''
|
17
|
-
@data[k] = CGI.unescapeHTML(v).strip
|
18
|
-
end
|
6
|
+
@network_id = find_network_id
|
7
|
+
@avatar = find_avatar
|
8
|
+
@display_name = find_display_name
|
9
|
+
@username = find_username
|
10
|
+
|
19
11
|
@counts = {
|
20
|
-
tweets:
|
21
|
-
following:
|
22
|
-
followers:
|
23
|
-
favorites:
|
24
|
-
lists:
|
25
|
-
}
|
12
|
+
tweets: find_tweets,
|
13
|
+
following: find_following,
|
14
|
+
followers: find_followers,
|
15
|
+
favorites: find_favorites,
|
16
|
+
lists: find_listed
|
17
|
+
}.delete_if { |_k, v| v.nil? }
|
18
|
+
|
19
|
+
@data = {
|
20
|
+
description: find_description,
|
21
|
+
location: find_location,
|
22
|
+
join_date: find_join_date
|
23
|
+
}.delete_if { |_k, v| v.nil? }
|
24
|
+
|
26
25
|
self
|
27
26
|
rescue => e
|
28
|
-
|
27
|
+
record_error __method__, e.message
|
29
28
|
return self
|
30
29
|
end
|
31
30
|
|
31
|
+
def find_network_id
|
32
|
+
find_by_regex(/data-user-id="(\d+)"/)
|
33
|
+
rescue => e
|
34
|
+
record_error __method__, e.message
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
|
38
|
+
def find_avatar
|
39
|
+
find_by_regex(/ProfileAvatar-image " src="([^"]+)"/)
|
40
|
+
rescue => e
|
41
|
+
record_error __method__, e.message
|
42
|
+
return nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def find_display_name
|
46
|
+
find_by_regex(/ProfileHeaderCard-nameLink[^>]+>([^<]+)</)
|
47
|
+
rescue => e
|
48
|
+
record_error __method__, e.message
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
|
52
|
+
def find_username
|
53
|
+
find_by_regex(/<title>[^\(]+\(@([^\)]+)\)/)
|
54
|
+
rescue => e
|
55
|
+
record_error __method__, e.message
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
|
59
|
+
def find_description
|
60
|
+
_desc = find_by_regex(/ProfileHeaderCard-bio[^>]+>([^<]+)</)
|
61
|
+
CGI.unescapeHTML(_desc.encode('utf-8')).strip
|
62
|
+
rescue => e
|
63
|
+
record_error __method__, e.message
|
64
|
+
return nil
|
65
|
+
end
|
66
|
+
|
67
|
+
def find_location
|
68
|
+
_loc = find_by_regex(/ProfileHeaderCard-locationText[^>]+>([^<]+)</)
|
69
|
+
CGI.unescapeHTML(_loc.encode('utf-8')).strip
|
70
|
+
rescue => e
|
71
|
+
record_error __method__, e.message
|
72
|
+
return nil
|
73
|
+
end
|
74
|
+
|
75
|
+
def find_join_date
|
76
|
+
_date = find_by_regex(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</)
|
77
|
+
CGI.unescapeHTML(_date.encode('utf-8')).strip
|
78
|
+
rescue => e
|
79
|
+
record_error __method__, e.message
|
80
|
+
return nil
|
81
|
+
end
|
82
|
+
|
83
|
+
def find_tweets
|
84
|
+
find_by_regex(/statuses_count":(\d+),"/).to_i
|
85
|
+
rescue => e
|
86
|
+
record_error __method__, e.message
|
87
|
+
return nil
|
88
|
+
end
|
89
|
+
|
90
|
+
def find_followers
|
91
|
+
find_by_regex(/followers_count":(\d+),"/).to_i
|
92
|
+
rescue => e
|
93
|
+
record_error __method__, e.message
|
94
|
+
return nil
|
95
|
+
end
|
96
|
+
|
97
|
+
def find_following
|
98
|
+
find_by_regex(/friends_count":(\d+),"/).to_i
|
99
|
+
rescue => e
|
100
|
+
record_error __method__, e.message
|
101
|
+
return nil
|
102
|
+
end
|
103
|
+
|
104
|
+
def find_favorites
|
105
|
+
find_by_regex(/favourites_count":(\d+),"/).to_i
|
106
|
+
rescue => e
|
107
|
+
record_error __method__, e.message
|
108
|
+
return nil
|
109
|
+
end
|
110
|
+
|
111
|
+
def find_listed
|
112
|
+
find_by_regex(/listed_count":(\d+),"/).to_i
|
113
|
+
rescue => e
|
114
|
+
record_error __method__, e.message
|
115
|
+
return nil
|
116
|
+
end
|
117
|
+
|
32
118
|
end
|
33
119
|
end
|
34
120
|
end
|
@@ -1,26 +1,24 @@
|
|
1
1
|
class IdsPlease
|
2
2
|
module Grabbers
|
3
3
|
class Vkontakte < IdsPlease::Grabbers::Base
|
4
|
-
|
5
4
|
def grab_link
|
6
5
|
agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'
|
7
6
|
@page_source ||= open(link, 'User-Agent' => agent).read.encode('utf-8')
|
8
|
-
@network_id =
|
7
|
+
@network_id = find_by_regex(/href="\/wall(-\d+)_/)
|
9
8
|
@username = @link.to_s.split('vk.com/').last.gsub('/', '')
|
10
|
-
@avatar =
|
9
|
+
@avatar = find_by_regex(/page_avatar.+\n.+src="([^"]+)/)
|
11
10
|
@avatar = CGI.unescapeHTML(@avatar) if @avatar
|
12
|
-
@display_name =
|
11
|
+
@display_name = find_by_regex(/page_name">([^<]+)/)
|
13
12
|
@display_name = CGI.unescapeHTML(@display_name) if @display_name
|
14
13
|
@data = {
|
15
|
-
description:
|
14
|
+
description: find_by_regex(/description" content="([^"]+)/)
|
16
15
|
}
|
17
16
|
@data[:description] = CGI.unescapeHTML(@data[:description]) if @data[:description]
|
18
17
|
self
|
19
18
|
rescue => e
|
20
|
-
|
19
|
+
record_error __method__, e.message
|
21
20
|
return self
|
22
21
|
end
|
23
|
-
|
24
22
|
end
|
25
23
|
end
|
26
24
|
end
|
data/lib/ids_please/grabbers.rb
CHANGED
@@ -8,14 +8,14 @@ require_relative 'grabbers/google_plus'
|
|
8
8
|
|
9
9
|
class IdsPlease
|
10
10
|
module Grabbers
|
11
|
-
|
11
|
+
|
12
12
|
NETWORKS = {
|
13
13
|
facebook: IdsPlease::Grabbers::Facebook,
|
14
14
|
vkontakte: IdsPlease::Grabbers::Vkontakte,
|
15
15
|
twitter: IdsPlease::Grabbers::Twitter,
|
16
16
|
instagram: IdsPlease::Grabbers::Instagram,
|
17
17
|
mailru: IdsPlease::Grabbers::Mailru,
|
18
|
-
google_plus: IdsPlease::Grabbers::GooglePlus
|
18
|
+
google_plus: IdsPlease::Grabbers::GooglePlus
|
19
19
|
}
|
20
20
|
|
21
21
|
def self.each
|
@@ -25,6 +25,6 @@ class IdsPlease
|
|
25
25
|
def self.by_symbol(sym)
|
26
26
|
NETWORKS[sym]
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
end
|
30
30
|
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
class IdsPlease
|
2
2
|
module Parsers
|
3
3
|
class Base
|
4
|
-
|
5
4
|
class << self
|
5
|
+
|
6
6
|
def to_sym
|
7
|
-
|
7
|
+
name.split('::').last.downcase.to_sym
|
8
8
|
end
|
9
9
|
|
10
10
|
def interact(links)
|
@@ -15,8 +15,6 @@ class IdsPlease
|
|
15
15
|
end.compact
|
16
16
|
end
|
17
17
|
|
18
|
-
private
|
19
|
-
|
20
18
|
def parse_link(link)
|
21
19
|
link.path.split('/')[1]
|
22
20
|
end
|
@@ -24,8 +22,8 @@ class IdsPlease
|
|
24
22
|
def valid_id_regex
|
25
23
|
/\A([\w\.\+-]{2,})/
|
26
24
|
end
|
27
|
-
end
|
28
25
|
|
26
|
+
end
|
29
27
|
end
|
30
28
|
end
|
31
29
|
end
|
data/lib/ids_please/parsers.rb
CHANGED
@@ -44,13 +44,13 @@ class IdsPlease
|
|
44
44
|
mailru: IdsPlease::Parsers::Mailru
|
45
45
|
}
|
46
46
|
|
47
|
-
def self.each
|
48
|
-
NETWORKS.values.each { |n| yield n }
|
49
|
-
end
|
50
|
-
|
51
47
|
def self.by_symbol(sym)
|
52
48
|
NETWORKS[sym]
|
53
49
|
end
|
54
50
|
|
51
|
+
def self.to_a
|
52
|
+
NETWORKS.values
|
53
|
+
end
|
54
|
+
|
55
55
|
end
|
56
56
|
end
|
data/lib/ids_please/version.rb
CHANGED
data/lib/ids_please.rb
CHANGED
@@ -26,7 +26,7 @@ class IdsPlease
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def recognized
|
29
|
-
Hash[@recognized.map { |parser, links| [
|
29
|
+
Hash[@recognized.map { |parser, links| [parser.to_sym, links] }]
|
30
30
|
end
|
31
31
|
|
32
32
|
def parse
|
@@ -39,22 +39,22 @@ class IdsPlease
|
|
39
39
|
|
40
40
|
private
|
41
41
|
|
42
|
-
def interact(
|
42
|
+
def interact(interactors_group = :parsers)
|
43
43
|
recognize
|
44
44
|
interacted = Hash.new { |hash, network| hash[network] = [] }
|
45
45
|
@recognized.each do |network, links|
|
46
|
-
interactor = IdsPlease.send(
|
46
|
+
interactor = IdsPlease.send(interactors_group).by_symbol(network)
|
47
47
|
interacted[network].concat interactor.interact(links)
|
48
48
|
end
|
49
|
-
|
49
|
+
instance_variable_set(interacted_var(interactors_group), interacted)
|
50
50
|
|
51
51
|
interacted
|
52
52
|
end
|
53
53
|
|
54
|
-
def interacted_var(
|
55
|
-
if
|
54
|
+
def interacted_var(interactors_group)
|
55
|
+
if interactors_group == :parsers
|
56
56
|
:@parsed
|
57
|
-
elsif
|
57
|
+
elsif interactors_group == :grabbers
|
58
58
|
:@grabbed
|
59
59
|
else
|
60
60
|
throw 'Wrong interactors type'
|
@@ -64,14 +64,14 @@ class IdsPlease
|
|
64
64
|
def recognize_link(link)
|
65
65
|
link = "http://#{link}" unless link =~ /\Ahttps?:\/\//
|
66
66
|
parsed_link = URI(URI.encode(link))
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
67
|
+
|
68
|
+
network = IdsPlease::Parsers.to_a.find { |n| parsed_link.host =~ n::MASK }
|
69
|
+
|
70
|
+
if network
|
71
|
+
@recognized[network.to_sym] ||= []
|
72
|
+
@recognized[network.to_sym] << parsed_link
|
73
|
+
else
|
74
|
+
@unrecognized << link
|
73
75
|
end
|
74
|
-
unrecognized << link
|
75
76
|
end
|
76
|
-
|
77
77
|
end
|
@@ -1,51 +1,49 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe IdsPlease do
|
4
|
-
|
5
4
|
recognazible_links = %w(
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
5
|
+
https://www.facebook.com/fb_acc
|
6
|
+
https://facebook.com/fb_acc2<U+200>
|
7
|
+
http://instagram.com/inst_acc
|
8
|
+
http://hi5.com/hi5_acc
|
9
|
+
http://www.hi5.com/profile.html?uid=12341234
|
10
|
+
http://pinterest.com/pinterest_acc
|
11
|
+
http://blogger-acc.blogspot.com
|
12
|
+
http://livejournal-acc.livejournal.com
|
13
|
+
http://livejournal-acc2.livejournal.ru
|
14
|
+
https://www.blogger.com/blogger.g?blogID=12341234#overview/src=dashboard
|
15
|
+
http://vk.com/vk_acc
|
16
|
+
http://linkedin.com/in/xnutsive
|
17
|
+
http://www.linkedin.com/company/evil-martians
|
18
|
+
http://www.linkedin.com/profile/view?id=12341234&trk=nav_responsive_tab_profile
|
19
|
+
http://Ameblo.jp/ameba_acc
|
20
|
+
http://reddit.com/user/reddit_acc
|
21
|
+
https://twitter.com/twi_acc
|
22
|
+
https://vimeo.com/vimeo_acc
|
23
|
+
https://plus.google.com/12341234
|
24
|
+
https://plus.google.com/+VladimirBokov
|
25
|
+
https://soundcloud.com/sc_acc
|
26
|
+
https://youtube.com/channels/yb_acc
|
27
|
+
http://tumblr-acc.tumblr.com
|
28
|
+
http://odnoklassniki.com/profile/12341234/about
|
29
|
+
http://ok.ru/profile/12341234/about
|
30
|
+
http://odnoklassniki.com/group/43214321/about?some=123
|
31
|
+
http://moikrug-acc.moikrug.ru
|
32
|
+
https://my.mail.ru/community/test-group-102/
|
33
|
+
https://my.mail.ru/mail/gazay/
|
34
|
+
)
|
36
35
|
|
37
36
|
not_recognazible_links = %w(
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
http://fucebook.com/not_recognized
|
38
|
+
http://vka.com/not_recognized
|
39
|
+
)
|
41
40
|
|
42
41
|
not_parseble_links = %w(
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
http://vk.com
|
43
|
+
http://soundcloud.com
|
44
|
+
)
|
46
45
|
|
47
46
|
describe 'recognize' do
|
48
|
-
|
49
47
|
it 'not recognizes wrong links' do
|
50
48
|
recognizer = IdsPlease.new(*not_recognazible_links)
|
51
49
|
recognizer.recognize
|
@@ -138,13 +136,10 @@ describe IdsPlease do
|
|
138
136
|
it 'recognizes mailru links' do
|
139
137
|
expect(@recognizer.recognized[:mailru].count).to eq(2)
|
140
138
|
end
|
141
|
-
|
142
139
|
end
|
143
|
-
|
144
140
|
end
|
145
141
|
|
146
142
|
describe 'parse' do
|
147
|
-
|
148
143
|
it 'not parse wrong links' do
|
149
144
|
@recognizer = IdsPlease.new(*not_parseble_links)
|
150
145
|
@recognizer.parse
|
@@ -170,7 +165,7 @@ describe IdsPlease do
|
|
170
165
|
end
|
171
166
|
|
172
167
|
it 'get right id from facebook link' do
|
173
|
-
expect(@recognizer.parsed[:facebook]).to eq(
|
168
|
+
expect(@recognizer.parsed[:facebook]).to eq(%w(fb_acc fb_acc2))
|
174
169
|
end
|
175
170
|
|
176
171
|
it 'get right id from linkedin link' do
|
@@ -210,7 +205,7 @@ describe IdsPlease do
|
|
210
205
|
end
|
211
206
|
|
212
207
|
it 'get right id from hi5 link' do
|
213
|
-
expect(@recognizer.parsed[:hi5]).to eq(
|
208
|
+
expect(@recognizer.parsed[:hi5]).to eq(%w(hi5_acc 12341234))
|
214
209
|
end
|
215
210
|
|
216
211
|
it 'get right id from soundcloud link' do
|
@@ -234,14 +229,12 @@ describe IdsPlease do
|
|
234
229
|
end
|
235
230
|
|
236
231
|
it 'get right id from odnoklassniki link' do
|
237
|
-
expect(@recognizer.parsed[:odnoklassniki].sort).to eq(
|
232
|
+
expect(@recognizer.parsed[:odnoklassniki].sort).to eq(%w(12341234 43214321 12341234).sort)
|
238
233
|
end
|
239
234
|
|
240
235
|
it 'get right id from moikrug link' do
|
241
236
|
expect(@recognizer.parsed[:moikrug].first).to eq('moikrug-acc')
|
242
237
|
end
|
243
|
-
|
244
238
|
end
|
245
239
|
end
|
246
|
-
|
247
240
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ids_please
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- gazay
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -40,7 +40,8 @@ dependencies:
|
|
40
40
|
version: '0'
|
41
41
|
description: Helps to get ids or screen names from links to social network accounts
|
42
42
|
email: alex.gaziev@gmail.com
|
43
|
-
executables:
|
43
|
+
executables:
|
44
|
+
- ids_please
|
44
45
|
extensions: []
|
45
46
|
extra_rdoc_files:
|
46
47
|
- LICENSE
|
@@ -51,8 +52,10 @@ files:
|
|
51
52
|
- LICENSE
|
52
53
|
- README.md
|
53
54
|
- Rakefile
|
55
|
+
- bin/ids_please
|
54
56
|
- ids_please.gemspec
|
55
57
|
- lib/ids_please.rb
|
58
|
+
- lib/ids_please/cli.rb
|
56
59
|
- lib/ids_please/grabbers.rb
|
57
60
|
- lib/ids_please/grabbers/base.rb
|
58
61
|
- lib/ids_please/grabbers/facebook.rb
|
@@ -106,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
109
|
version: '0'
|
107
110
|
requirements: []
|
108
111
|
rubyforge_project:
|
109
|
-
rubygems_version: 2.4.5
|
112
|
+
rubygems_version: 2.4.5.1
|
110
113
|
signing_key:
|
111
114
|
specification_version: 4
|
112
115
|
summary: Helps to get ids or screen names from links to social network accounts
|