ids_please 2.2.0 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cbbb7fdc96da69f11a60cfbda149c1efc18cf3ec
4
- data.tar.gz: a32ec832041c194f72d78aae0f410688edbd3239
3
+ metadata.gz: 728f2add26771adaaedbb2afb42fa59126e19a10
4
+ data.tar.gz: 56285e208b28c26303362a8070b3e538eecac37a
5
5
  SHA512:
6
- metadata.gz: e903c0a4695122ade18201fe6f33035644c2193cd13e1f950426da6ba98de85ccb4953f6aab4069288652b202441ed4992b09cd51e13ea34d544e8afe5060ed8
7
- data.tar.gz: 94ef374b9075560bad7f53dd829d98a499f3b84d923de4392df9f77a07afeb628a723ef8e977b9fcde2323c94084da47d726e14791c08f3d81997d64905475da
6
+ metadata.gz: a26e536a30e7461fd9396df7d1db9806affb1c9212fdc4850eb853588a706b705c2b16c5f012cff8c7d04be30e88408ffbcea8f28967109d1ef7f81c1995eaec
7
+ data.tar.gz: d051a8a184798fc5d080f65db17bc78ce265e6030748923c3a6e6f0599154342ec1ec1de626ddb077c934c086015ea59d8d0b7d68c626c53113f26306e70a75d
data/Rakefile CHANGED
@@ -4,9 +4,9 @@ require 'bundler'
4
4
  Bundler::GemHelper.install_tasks
5
5
 
6
6
  desc 'Run all tests by default'
7
- task :default => :spec
7
+ task default: :spec
8
8
 
9
9
  require 'rspec/core/rake_task'
10
10
  RSpec::Core::RakeTask.new do |t|
11
- t.rspec_opts = ["--color", '--format doc']
12
- end
11
+ t.rspec_opts = ['--color', '--format doc']
12
+ end
data/bin/ids_please ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ ids_please_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ $LOAD_PATH.unshift(ids_please_dir) unless $LOAD_PATH.include?(ids_please_dir)
4
+ require 'ids_please'
5
+ require 'ids_please/cli'
6
+
7
+ IdsPlease::CLI.run(ARGV)
data/ids_please.gemspec CHANGED
@@ -4,17 +4,19 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'ids_please/version'
5
5
 
6
6
  Gem::Specification.new do |s|
7
- s.name = "ids_please"
7
+ s.name = 'ids_please'
8
8
  s.version = IdsPlease::VERSION
9
- s.authors = ["gazay"]
10
- s.description = %q{Helps to get ids or screen names from links to social network accounts}
11
- s.summary = %q{Helps to get ids or screen names from links to social network accounts}
9
+ s.authors = ['gazay']
10
+ s.description = 'Helps to get ids or screen names from links to social network accounts'
11
+ s.summary = 'Helps to get ids or screen names from links to social network accounts'
12
12
  s.licenses = ['MIT']
13
- s.email = "alex.gaziev@gmail.com"
14
- s.extra_rdoc_files = ["LICENSE"]
15
- s.rdoc_options = ["--charset=UTF-8"]
16
- s.homepage = "http://github.com/gazay/ids_please"
17
- s.require_paths = ["lib"]
13
+ s.email = 'alex.gaziev@gmail.com'
14
+ s.extra_rdoc_files = ['LICENSE']
15
+ s.rdoc_options = ['--charset=UTF-8']
16
+ s.homepage = 'http://github.com/gazay/ids_please'
17
+ s.require_paths = ['lib']
18
+ s.bindir = 'bin'
19
+ s.executables = 'ids_please'
18
20
  s.files = `git ls-files`.split("\n")
19
21
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
22
  s.add_development_dependency 'rake'
@@ -0,0 +1,112 @@
1
+ class IdsPlease
2
+ module CLI
3
+ def self.run(args)
4
+ command = args.shift
5
+ case command
6
+ when 'grab', 'parse', 'recognize'
7
+ when 'help', nil
8
+ help
9
+ exit
10
+ else
11
+ abort "Unknown command. Enter 'ids_please help' for instructions"
12
+ end
13
+
14
+ links = args
15
+ if links.empty?
16
+ abort "You didn't enter any links. Enter 'ids_please help' for instructions"
17
+ end
18
+
19
+ ids = IdsPlease.new(*links)
20
+ case command
21
+ when 'grab'
22
+ grab(ids)
23
+ when 'parse'
24
+ parse(ids)
25
+ when 'recognize'
26
+ recognize(ids)
27
+ end
28
+ end
29
+
30
+ module_function
31
+
32
+ def grab(ids)
33
+ ids.grab
34
+ ids.grabbed.each do |social_network, grabbers_array|
35
+ puts social_network.to_s.capitalize + ': '
36
+ grabbers_array.each do |grabber|
37
+ grabber.to_h.each do |property, value|
38
+ unless value.nil? || value.to_s.empty? || property == :page_source
39
+
40
+ if value.class == Hash
41
+ value.delete_if { |_, v| v.nil? }
42
+ unless value.empty?
43
+ puts " #{property}: "
44
+ value.each do |k, v|
45
+ puts " #{k}: #{v}"
46
+ end
47
+ end
48
+ else
49
+ puts " #{property}: #{value}"
50
+ end
51
+
52
+ end
53
+ end
54
+ puts "\n" unless grabbers_array.last == grabber
55
+ end
56
+ puts "\n" unless ids.grabbed.to_a.last[0] == social_network
57
+ end
58
+ end
59
+
60
+ def parse(ids)
61
+ ids.parse
62
+ ids.parsed.each do |social_network, permalinks_array|
63
+ puts social_network.to_s.capitalize + ': '
64
+ permalinks_array.each do |permalink|
65
+ puts " #{permalink}"
66
+ end
67
+ puts "\n" unless ids.parsed.to_a.last[0] == social_network
68
+ end
69
+ end
70
+
71
+ def recognize(ids)
72
+ ids.recognize
73
+ unless ids.recognized.empty?
74
+ puts 'Recognized:'
75
+ ids.recognized.each do |social_network, urls_array|
76
+ puts " #{social_network.to_s.capitalize}: "
77
+ urls_array.each do |url|
78
+ puts " #{url}"
79
+ end
80
+ puts "\n"
81
+ end
82
+ end
83
+
84
+ unless ids.unrecognized.empty?
85
+ puts 'Unrecognized:'
86
+ ids.unrecognized.each do |url|
87
+ puts " #{url}"
88
+ end
89
+ end
90
+ end
91
+
92
+ def help
93
+ puts <<-HELP.gsub(/^ {8}/, '')
94
+ IDs, please
95
+ Grab some hidden in html data from social account page
96
+ Get social network IDs or screen names from links to social network accounts
97
+
98
+ Usage:
99
+ ids_please command [links]
100
+
101
+ Available commands:
102
+ grab grab some hidden in html data from social account page (avatar, username, id...)
103
+ parse get screen names from links to social network accounts
104
+ recognize check that the link is for a known social network
105
+
106
+ Examples:
107
+ ids_please grab https://instagram.com/microsoft
108
+ ids_please parse https://facebook.com/Microsoft https://instagram.com/microsoft
109
+ HELP
110
+ end
111
+ end
112
+ end
@@ -8,13 +8,20 @@ class IdsPlease
8
8
  links.map { |l| self.new(l).grab_link }
9
9
  end
10
10
 
11
- attr_reader :avatar, :display_name, :username, :link, :page_source, :network_id, :data, :counts
11
+ attr_reader :avatar,
12
+ :display_name,
13
+ :username,
14
+ :link,
15
+ :page_source,
16
+ :network_id,
17
+ :data,
18
+ :counts
12
19
 
13
20
  def initialize(link)
14
21
  @link = link
15
22
  end
16
23
 
17
- def grab_link(link)
24
+ def grab_link(_link)
18
25
  throw 'Base grabber can not grab anything'
19
26
  end
20
27
 
@@ -48,6 +55,19 @@ class IdsPlease
48
55
  def page_source
49
56
  @page_source ||= open(link).read
50
57
  end
58
+
59
+ def errors
60
+ @errors ||= []
61
+ end
62
+
63
+ def record_error(event, message)
64
+ errors << "#{event} has #{message}"
65
+ end
66
+
67
+ def find_by_regex(reg)
68
+ page_source.scan(reg).flatten.first
69
+ end
70
+
51
71
  end
52
72
  end
53
73
  end
@@ -3,43 +3,96 @@ class IdsPlease
3
3
  class Facebook < IdsPlease::Grabbers::Base
4
4
 
5
5
  def grab_link
6
- @network_id = page_source.scan(/entity_id":"(\d+)"/).flatten.first
7
- @avatar = page_source.scan(/og:image" content="([^"]+)"/).flatten.first
8
- @display_name = page_source.scan(/og:title" content="([^"]+)"/).flatten.first
9
- @username = page_source.scan(/og:url" content="[^"]+\/([^\/"]+)"/).flatten.first
10
- @avatar = CGI.unescapeHTML(@avatar.encode('utf-8')) if @avatar
11
- @display_name = CGI.unescapeHTML(@display_name.encode('utf-8')) if @display_name
12
- @data = {}
13
- {
14
- type: page_source.scan(/og:type" content="([^"]+)"/).flatten.first.to_s.encode('utf-8'),
15
- description: page_source.scan(/og:description" content="([^"]+)"/).flatten.first.to_s.encode('utf-8'),
16
- }.each do |k, v|
17
- next if v.nil? || v == ''
18
- @data[k] = CGI.unescapeHTML(v).strip
19
- end
6
+ @network_id = find_network_id
7
+ @avatar = find_avatar
8
+ @display_name = find_display_name
9
+ @username = find_username
10
+
20
11
  @counts = {
21
- likes: likes,
22
- visits: visits,
23
- }.delete_if {|k,v| v.nil? }
12
+ likes: find_likes,
13
+ visits: find_visits
14
+ }.delete_if { |_k, v| v.nil? }
15
+
16
+ @data = {
17
+ type: find_type,
18
+ description: find_description
19
+ }.delete_if { |_k, v| v.nil? }
20
+
24
21
  self
25
22
  rescue => e
26
- p e
23
+ record_error __method__, e.message
27
24
  return self
28
25
  end
29
26
 
30
- def likes
31
- page_source.scan(/>([^"]+) <span class=".+">likes/).flatten.first.to_s.tr(',','').to_i
27
+ private
28
+
29
+ def find_network_id
30
+ find_by_regex(/entity_id":"(\d+)"/)
31
+ rescue => e
32
+ record_error __method__, e.message
33
+ return nil
34
+ end
35
+
36
+ def find_avatar
37
+ CGI.unescapeHTML(
38
+ find_by_regex(/profilePic\simg"\salt=[^=]+="([^"]+)/).encode('utf-8')
39
+ )
40
+ rescue => e
41
+ record_error __method__, e.message
42
+ return nil
43
+ end
44
+
45
+ def find_display_name
46
+ CGI.unescapeHTML(
47
+ find_by_regex(/pageTitle">([^<\|]+)/).strip.encode('utf-8')
48
+ )
49
+ rescue => e
50
+ record_error __method__, e.message
51
+ return nil
52
+ end
53
+
54
+ def find_username
55
+ find_by_regex(/link\srel="canonical"\shref="https:\/\/facebook\.com\/([^"]+)/) ||
56
+ find_by_regex(/;\sURL=\/([^\/\?]+)/)
57
+ rescue => e
58
+ record_error __method__, e.message
59
+ return nil
60
+ end
61
+
62
+ def find_type
63
+ find_by_regex(/type":"Person/) ? 'perosnal' : 'group'
64
+ rescue => e
65
+ record_error __method__, e.message
66
+ return nil
67
+ end
68
+
69
+ def find_description
70
+ CGI.unescapeHTML(
71
+ find_by_regex(/name="description" content="([^"]+)"/).encode('utf-8')
72
+ ).strip
73
+ rescue => e
74
+ record_error __method__, e.message
75
+ return nil
76
+ end
77
+
78
+ def find_likes
79
+ if likes = find_by_regex(/>([^"]+) <span class=".+">likes/)
80
+ likes.tr(',', '').to_i
81
+ end
32
82
  rescue => e
33
- p e
83
+ record_error __method__, e.message
34
84
  return nil
35
85
  end
36
86
 
37
- def visits
38
- page_source.scan(/likes.+>([^"]+)<\/span> <span class=".+">visits/).flatten.first.to_s.tr(',','').to_i
87
+ def find_visits
88
+ if visits = find_by_regex(/likes.+>([^"]+)<\/span> <span class=".+">visits/)
89
+ visits.tr(',', '').to_i
90
+ end
39
91
  rescue => e
40
- p e
92
+ record_error __method__, e.message
41
93
  return nil
42
94
  end
95
+
43
96
  end
44
97
  end
45
98
  end
@@ -3,23 +3,79 @@ class IdsPlease
3
3
  class GooglePlus < IdsPlease::Grabbers::Base
4
4
 
5
5
  def grab_link
6
- @network_id = page_source.scan(/data-oid="(\d+)"/).flatten.first
7
- @avatar = 'https:' + page_source.scan(/guidedhelpid="profile_photo"><img src="([^"]+)"/).flatten.first
8
- @display_name = page_source.scan(/og:title" content="([^"]+)"/).flatten.first.gsub(' - Google+','')
9
- @username = '+' + page_source.scan(/&quot;https:\/\/plus.google.com\/\+(.+?)&quot;/).flatten.first
10
- @data = {
11
- description: page_source.scan(/name="Description" content="([^"]+)">/).flatten.first.to_s.encode('utf-8')
12
- }
6
+ @network_id = find_network_id
7
+ @avatar = find_avatar
8
+ @display_name = find_display_name
9
+ @username = find_username
10
+
13
11
  @counts = {
14
- followers: page_source.scan(/">([^"]+)<\/span> followers</).flatten.first.to_s.tr(',','').to_i,
15
- views: page_source.scan(/">([^"]+)<\/span> views</).flatten.first.to_s.tr(',','').to_i,
16
- }
12
+ followers: find_followers,
13
+ views: find_views
14
+ }.delete_if { |_k, v| v.nil? }
15
+
16
+ @data = {
17
+ description: find_description
18
+ }.delete_if { |_k, v| v.nil? }
19
+
17
20
  self
18
21
  rescue => e
19
- p e
22
+ record_error __method__, e.message
20
23
  return self
21
24
  end
22
25
 
26
+ def find_network_id
27
+ find_by_regex(/oid="(\d+)"/)
28
+ rescue => e
29
+ record_error __method__, e.message
30
+ return nil
31
+ end
32
+
33
+ def find_avatar
34
+ "https:#{find_by_regex(/guidedhelpid="profile_photo"><img src="([^"]+)"/)}"
35
+ rescue => e
36
+ record_error __method__, e.message
37
+ return nil
38
+ end
39
+
40
+ def find_display_name
41
+ find_by_regex(/og:title" content="([^"]+)"/).gsub(' - Google+', '')
42
+ rescue => e
43
+ record_error __method__, e.message
44
+ return nil
45
+ end
46
+
47
+ def find_username
48
+ "+#{find_by_regex(/&quot;https:\/\/plus.google.com\/\+(.+?)&quot;/)}"
49
+ rescue => e
50
+ record_error __method__, e.message
51
+ return nil
52
+ end
53
+
54
+ def find_description
55
+ find_by_regex(/name="Description" content="([^"]+)">/).encode('utf-8')
56
+ rescue => e
57
+ record_error __method__, e.message
58
+ return nil
59
+ end
60
+
61
+ def find_followers
62
+ if followers = find_by_regex(/">([^"]+)<\/span> followers</)
63
+ followers.tr(',', '').to_i
64
+ end
65
+ rescue => e
66
+ record_error __method__, e.message
67
+ return nil
68
+ end
69
+
70
+ def find_views
71
+ if views = find_by_regex(/">([^"]+)<\/span> views</)
72
+ views.tr(',', '').to_i
73
+ end
74
+ rescue => e
75
+ record_error __method__, e.message
76
+ return nil
77
+ end
78
+
23
79
  end
24
80
  end
25
81
  end
@@ -5,25 +5,96 @@ class IdsPlease
5
5
  class Instagram < IdsPlease::Grabbers::Base
6
6
 
7
7
  def grab_link
8
- @network_id = page_source.scan(/"user":{.+"id":"(\d+)"/).flatten.first
9
- @avatar = page_source.scan(/"user":{.+"profile_pic_url":"([^"]+)"/).flatten.first.gsub('\\', '')
10
- @display_name = page_source.scan(/"user":{.+"full_name":"([^"]+)"/).flatten.first
11
- @username = page_source.scan(/"user":{"username":"([^"]+)"/).flatten.first.gsub('\\', '')
12
- @data = {
13
- bio: page_source.scan(/"biography":"([^"]+)"/).flatten.first,
14
- website: page_source.scan(/"user":{.+"external_url":"([^"]+)"/).flatten.first.gsub('\\', ''),
15
- }
8
+ @network_id = find_network_id
9
+ @avatar = find_avatar
10
+ @display_name = find_display_name
11
+ @username = find_username
12
+
16
13
  @counts = {
17
- media: page_source.scan(/"media":{"count":(\d+)/).flatten.first.to_i,
18
- followed_by: page_source.scan(/"followed_by":{"count":(\d+)/).flatten.first.to_i,
19
- follows: page_source.scan(/"follows":{"count":(\d+)/).flatten.first.to_i,
20
- }
21
- @display_name = @display_name.gsub(/\\u([\da-fA-F]{4})/) {|m| [$1].pack("H*").unpack("n*").pack("U*")}
14
+ media: find_media,
15
+ followed_by: find_followed_by,
16
+ follows: find_follows
17
+ }.delete_if { |_k, v| v.nil? }
18
+
19
+ @data = {
20
+ bio: find_bio,
21
+ website: find_website
22
+ }.delete_if { |_k, v| v.nil? }
23
+
22
24
  self
23
25
  rescue => e
24
- p e
26
+ record_error __method__, e.message
25
27
  return self
26
28
  end
29
+
30
+ private
31
+
32
+ def find_network_id
33
+ find_by_regex(/"user":{.+"id":"(\d+)"/)
34
+ rescue => e
35
+ record_error __method__, e.message
36
+ return nil
37
+ end
38
+
39
+ def find_avatar
40
+ find_by_regex(/"user":{.+"profile_pic_url":"([^"]+)"/).gsub('\\', '')
41
+ rescue => e
42
+ record_error __method__, e.message
43
+ return nil
44
+ end
45
+
46
+ def find_display_name
47
+ _display_name = find_by_regex(/"user":{.+"full_name":"([^"]+)"/)
48
+ _display_name.gsub(/\\u([\da-fA-F]{4})/) { |_m|
49
+ [Regexp.last_match(1)].pack('H*').unpack('n*').pack('U*')
50
+ }
51
+ rescue => e
52
+ record_error __method__, e.message
53
+ return nil
54
+ end
55
+
56
+ def find_username
57
+ find_by_regex(/"user":{"username":"([^"]+)"/).gsub('\\', '')
58
+ rescue => e
59
+ record_error __method__, e.message
60
+ return nil
61
+ end
62
+
63
+ def find_bio
64
+ CGI.unescapeHTML(find_by_regex(/"biography":"([^"]+)"/)).strip
65
+ rescue => e
66
+ record_error __method__, e.message
67
+ return nil
68
+ end
69
+
70
+ def find_website
71
+ CGI.unescapeHTML(find_by_regex(/"user":{.+"external_url":"([^"]+)"/).gsub('\\', '')).strip
72
+ rescue => e
73
+ record_error __method__, e.message
74
+ return nil
75
+ end
76
+
77
+ def find_media
78
+ find_by_regex(/"media":{"count":(\d+)/).to_i
79
+ rescue => e
80
+ record_error __method__, e.message
81
+ return nil
82
+ end
83
+
84
+ def find_followed_by
85
+ find_by_regex(/"followed_by":{"count":(\d+)/).to_i
86
+ rescue => e
87
+ record_error __method__, e.message
88
+ return nil
89
+ end
90
+
91
+ def find_follows
92
+ find_by_regex(/"follows":{"count":(\d+)/).to_i
93
+ rescue => e
94
+ record_error __method__, e.message
95
+ return nil
96
+ end
97
+
27
98
  end
28
99
  end
29
100
  end
@@ -9,17 +9,17 @@ class IdsPlease
9
9
  uid_url = "http://appsmail.ru/platform/#{link.split('/')[-2..-1].join('/')}"
10
10
  @network_id = JSON.parse(open(uid_url).read)['uid']
11
11
  @username, type = get_name_and_type(link)
12
- @avatar = page_source.scan(/profile__avatar" src="([^"]+)/).flatten.first
13
- @display_name = page_source.scan(/h1.+title="([^"]+)/).flatten.first
12
+ @avatar = find_by_regex(/profile__avatar" src="([^"]+)/)
13
+ @display_name = find_by_regex(/h1.+title="([^"]+)/)
14
14
  @display_name = CGI.unescapeHTML(@display_name) if @display_name
15
15
  @data = {
16
16
  type: type,
17
- description: page_source.scan(/profile__content_mainInfo" title="([^"]+)/).flatten.first
17
+ description: find_by_regex(/profile__content_mainInfo" title="([^"]+)/)
18
18
  }
19
19
  @data[:description] = CGI.unescapeHTML(@data[:description]) if @data[:description]
20
20
  self
21
21
  rescue => e
22
- p e
22
+ record_error __method__, e.message
23
23
  return self
24
24
  end
25
25
 
@@ -3,32 +3,118 @@ class IdsPlease
3
3
  class Twitter < IdsPlease::Grabbers::Base
4
4
 
5
5
  def grab_link
6
- @network_id = page_source.scan(/data-user-id="(\d+)"/).flatten.first
7
- @avatar = page_source.scan(/ProfileAvatar-image " src="([^"]+)"/).flatten.first
8
- @display_name = page_source.scan(/ProfileHeaderCard-nameLink[^>]+>([^<]+)</).flatten.first
9
- @username = page_source.scan(/<title>[^\(]+\(@([^\)]+)\)/).flatten.first
10
- @data = {}
11
- {
12
- description: page_source.scan(/ProfileHeaderCard-bio[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
13
- location: page_source.scan(/ProfileHeaderCard-locationText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
14
- join_date: page_source.scan(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
15
- }.each do |k, v|
16
- next if v.nil? || v == ''
17
- @data[k] = CGI.unescapeHTML(v).strip
18
- end
6
+ @network_id = find_network_id
7
+ @avatar = find_avatar
8
+ @display_name = find_display_name
9
+ @username = find_username
10
+
19
11
  @counts = {
20
- tweets: page_source.scan(/statuses_count&quot;:(\d+),&quot;/).flatten.first.to_i,
21
- following: page_source.scan(/friends_count&quot;:(\d+),&quot;/).flatten.first.to_i,
22
- followers: page_source.scan(/followers_count&quot;:(\d+),&quot;/).flatten.first.to_i,
23
- favorites: page_source.scan(/favourites_count&quot;:(\d+),&quot;/).flatten.first.to_i,
24
- lists: page_source.scan(/listed_count&quot;:(\d+),&quot;/).flatten.first.to_i,
25
- }
12
+ tweets: find_tweets,
13
+ following: find_following,
14
+ followers: find_followers,
15
+ favorites: find_favorites,
16
+ lists: find_listed
17
+ }.delete_if { |_k, v| v.nil? }
18
+
19
+ @data = {
20
+ description: find_description,
21
+ location: find_location,
22
+ join_date: find_join_date
23
+ }.delete_if { |_k, v| v.nil? }
24
+
26
25
  self
27
26
  rescue => e
28
- p e
27
+ record_error __method__, e.message
29
28
  return self
30
29
  end
31
30
 
31
+ def find_network_id
32
+ find_by_regex(/data-user-id="(\d+)"/)
33
+ rescue => e
34
+ record_error __method__, e.message
35
+ return nil
36
+ end
37
+
38
+ def find_avatar
39
+ find_by_regex(/ProfileAvatar-image " src="([^"]+)"/)
40
+ rescue => e
41
+ record_error __method__, e.message
42
+ return nil
43
+ end
44
+
45
+ def find_display_name
46
+ find_by_regex(/ProfileHeaderCard-nameLink[^>]+>([^<]+)</)
47
+ rescue => e
48
+ record_error __method__, e.message
49
+ return nil
50
+ end
51
+
52
+ def find_username
53
+ find_by_regex(/<title>[^\(]+\(@([^\)]+)\)/)
54
+ rescue => e
55
+ record_error __method__, e.message
56
+ return nil
57
+ end
58
+
59
+ def find_description
60
+ _desc = find_by_regex(/ProfileHeaderCard-bio[^>]+>([^<]+)</)
61
+ CGI.unescapeHTML(_desc.encode('utf-8')).strip
62
+ rescue => e
63
+ record_error __method__, e.message
64
+ return nil
65
+ end
66
+
67
+ def find_location
68
+ _loc = find_by_regex(/ProfileHeaderCard-locationText[^>]+>([^<]+)</)
69
+ CGI.unescapeHTML(_loc.encode('utf-8')).strip
70
+ rescue => e
71
+ record_error __method__, e.message
72
+ return nil
73
+ end
74
+
75
+ def find_join_date
76
+ _date = find_by_regex(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</)
77
+ CGI.unescapeHTML(_date.encode('utf-8')).strip
78
+ rescue => e
79
+ record_error __method__, e.message
80
+ return nil
81
+ end
82
+
83
+ def find_tweets
84
+ find_by_regex(/statuses_count&quot;:(\d+),&quot;/).to_i
85
+ rescue => e
86
+ record_error __method__, e.message
87
+ return nil
88
+ end
89
+
90
+ def find_followers
91
+ find_by_regex(/followers_count&quot;:(\d+),&quot;/).to_i
92
+ rescue => e
93
+ record_error __method__, e.message
94
+ return nil
95
+ end
96
+
97
+ def find_following
98
+ find_by_regex(/friends_count&quot;:(\d+),&quot;/).to_i
99
+ rescue => e
100
+ record_error __method__, e.message
101
+ return nil
102
+ end
103
+
104
+ def find_favorites
105
+ find_by_regex(/favourites_count&quot;:(\d+),&quot;/).to_i
106
+ rescue => e
107
+ record_error __method__, e.message
108
+ return nil
109
+ end
110
+
111
+ def find_listed
112
+ find_by_regex(/listed_count&quot;:(\d+),&quot;/).to_i
113
+ rescue => e
114
+ record_error __method__, e.message
115
+ return nil
116
+ end
117
+
32
118
  end
33
119
  end
34
120
  end
@@ -1,26 +1,24 @@
1
1
  class IdsPlease
2
2
  module Grabbers
3
3
  class Vkontakte < IdsPlease::Grabbers::Base
4
-
5
4
  def grab_link
6
5
  agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'
7
6
  @page_source ||= open(link, 'User-Agent' => agent).read.encode('utf-8')
8
- @network_id = page_source.scan(/href="\/wall(-\d+)_/).flatten.first
7
+ @network_id = find_by_regex(/href="\/wall(-\d+)_/)
9
8
  @username = @link.to_s.split('vk.com/').last.gsub('/', '')
10
- @avatar = page_source.scan(/page_avatar.+\n.+src="([^"]+)/).flatten.first
9
+ @avatar = find_by_regex(/page_avatar.+\n.+src="([^"]+)/)
11
10
  @avatar = CGI.unescapeHTML(@avatar) if @avatar
12
- @display_name = page_source.scan(/page_name">([^<]+)/).flatten.first
11
+ @display_name = find_by_regex(/page_name">([^<]+)/)
13
12
  @display_name = CGI.unescapeHTML(@display_name) if @display_name
14
13
  @data = {
15
- description: page_source.scan(/description" content="([^"]+)/).flatten.first
14
+ description: find_by_regex(/description" content="([^"]+)/)
16
15
  }
17
16
  @data[:description] = CGI.unescapeHTML(@data[:description]) if @data[:description]
18
17
  self
19
18
  rescue => e
20
- p e
19
+ record_error __method__, e.message
21
20
  return self
22
21
  end
23
-
24
22
  end
25
23
  end
26
24
  end
@@ -8,14 +8,14 @@ require_relative 'grabbers/google_plus'
8
8
 
9
9
  class IdsPlease
10
10
  module Grabbers
11
-
11
+
12
12
  NETWORKS = {
13
13
  facebook: IdsPlease::Grabbers::Facebook,
14
14
  vkontakte: IdsPlease::Grabbers::Vkontakte,
15
15
  twitter: IdsPlease::Grabbers::Twitter,
16
16
  instagram: IdsPlease::Grabbers::Instagram,
17
17
  mailru: IdsPlease::Grabbers::Mailru,
18
- google_plus: IdsPlease::Grabbers::GooglePlus,
18
+ google_plus: IdsPlease::Grabbers::GooglePlus
19
19
  }
20
20
 
21
21
  def self.each
@@ -25,6 +25,6 @@ class IdsPlease
25
25
  def self.by_symbol(sym)
26
26
  NETWORKS[sym]
27
27
  end
28
-
28
+
29
29
  end
30
30
  end
@@ -1,10 +1,10 @@
1
1
  class IdsPlease
2
2
  module Parsers
3
3
  class Base
4
-
5
4
  class << self
5
+
6
6
  def to_sym
7
- self.name.split('::').last.downcase.to_sym
7
+ name.split('::').last.downcase.to_sym
8
8
  end
9
9
 
10
10
  def interact(links)
@@ -15,8 +15,6 @@ class IdsPlease
15
15
  end.compact
16
16
  end
17
17
 
18
- private
19
-
20
18
  def parse_link(link)
21
19
  link.path.split('/')[1]
22
20
  end
@@ -24,8 +22,8 @@ class IdsPlease
24
22
  def valid_id_regex
25
23
  /\A([\w\.\+-]{2,})/
26
24
  end
27
- end
28
25
 
26
+ end
29
27
  end
30
28
  end
31
29
  end
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /fb\.me|fb\.com|facebook/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  query = CGI.parse(link.query) if link.query && !link.query.empty?
12
10
 
@@ -13,8 +13,6 @@ class IdsPlease
13
13
  links.map { |l| parse_link(l) }.compact
14
14
  end
15
15
 
16
- private
17
-
18
16
  def parse_link(link)
19
17
  if matched = link.path.match(/\/(\+\w+)/)
20
18
  matched[1]
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /hi5/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  query = CGI.parse(link.query) if link.query && !link.query.empty?
12
10
 
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /linkedin/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  query = CGI.parse(link.query) if link.query && !link.query.empty?
12
10
 
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /livejournal/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  parsed = link.host.sub('.livejournal.com', '')
12
10
  parsed = link.host.split('.livejournal').first if parsed == link.host
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /mail\.ru/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  id = link.path.split('/')[2]
12
10
  id.split('?').first.split('#').first
@@ -12,8 +12,6 @@ class IdsPlease
12
12
  end.compact
13
13
  end
14
14
 
15
- private
16
-
17
15
  def parse_link(link)
18
16
  link.host.sub('.moikrug.ru', '')
19
17
  end
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /odnoklassniki|ok\.ru/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  if matched = link.path.match(/\/(\d{2,})/)
12
10
  matched[1]
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /reddit/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  link.path.split('/')[2]
12
10
  end
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /tumblr/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  return if link.host.sub('.tumblr.com', '') == link.host
12
10
  link.host.sub('.tumblr.com', '')
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /twitter/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  if link.path =~ /%23!/
12
10
  id = link.path.sub(/\A\/%23!\//, '')
@@ -9,8 +9,6 @@ class IdsPlease
9
9
  links.map { |l| parse_link(l) }.compact
10
10
  end
11
11
 
12
- private
13
-
14
12
  def parse_link(link)
15
13
  if link.path =~ /id|club|public/
16
14
  id = link.path.sub(/\A\/id|\A\/club|\A\/public/, '')
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /youtu\.be|youtube/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  if link.path =~ /channels|user/
12
10
  link.path.split('/')[2]
@@ -44,13 +44,13 @@ class IdsPlease
44
44
  mailru: IdsPlease::Parsers::Mailru
45
45
  }
46
46
 
47
- def self.each
48
- NETWORKS.values.each { |n| yield n }
49
- end
50
-
51
47
  def self.by_symbol(sym)
52
48
  NETWORKS[sym]
53
49
  end
54
50
 
51
+ def self.to_a
52
+ NETWORKS.values
53
+ end
54
+
55
55
  end
56
56
  end
@@ -1,3 +1,5 @@
1
1
  class IdsPlease
2
- VERSION = '2.2.0'
2
+
3
+ VERSION = '2.2.1'
4
+
3
5
  end
data/lib/ids_please.rb CHANGED
@@ -26,7 +26,7 @@ class IdsPlease
26
26
  end
27
27
 
28
28
  def recognized
29
- Hash[@recognized.map { |parser, links| [ parser.to_sym, links ] }]
29
+ Hash[@recognized.map { |parser, links| [parser.to_sym, links] }]
30
30
  end
31
31
 
32
32
  def parse
@@ -39,22 +39,22 @@ class IdsPlease
39
39
 
40
40
  private
41
41
 
42
- def interact(interactors = :parsers)
42
+ def interact(interactors_group = :parsers)
43
43
  recognize
44
44
  interacted = Hash.new { |hash, network| hash[network] = [] }
45
45
  @recognized.each do |network, links|
46
- interactor = IdsPlease.send(interactors).by_symbol(network)
46
+ interactor = IdsPlease.send(interactors_group).by_symbol(network)
47
47
  interacted[network].concat interactor.interact(links)
48
48
  end
49
- self.instance_variable_set(interacted_var(interactors), interacted)
49
+ instance_variable_set(interacted_var(interactors_group), interacted)
50
50
 
51
51
  interacted
52
52
  end
53
53
 
54
- def interacted_var(interactors)
55
- if interactors == :parsers
54
+ def interacted_var(interactors_group)
55
+ if interactors_group == :parsers
56
56
  :@parsed
57
- elsif interactors == :grabbers
57
+ elsif interactors_group == :grabbers
58
58
  :@grabbed
59
59
  else
60
60
  throw 'Wrong interactors type'
@@ -64,14 +64,14 @@ class IdsPlease
64
64
  def recognize_link(link)
65
65
  link = "http://#{link}" unless link =~ /\Ahttps?:\/\//
66
66
  parsed_link = URI(URI.encode(link))
67
- IdsPlease::Parsers.each do |network|
68
- if parsed_link.host =~ network::MASK
69
- @recognized[network.to_sym] ||= []
70
- @recognized[network.to_sym] << parsed_link
71
- return
72
- end
67
+
68
+ network = IdsPlease::Parsers.to_a.find { |n| parsed_link.host =~ n::MASK }
69
+
70
+ if network
71
+ @recognized[network.to_sym] ||= []
72
+ @recognized[network.to_sym] << parsed_link
73
+ else
74
+ @unrecognized << link
73
75
  end
74
- unrecognized << link
75
76
  end
76
-
77
77
  end
@@ -1,51 +1,49 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe IdsPlease do
4
-
5
4
  recognazible_links = %w(
6
- https://www.facebook.com/fb_acc
7
- https://facebook.com/fb_acc2<U+200>
8
- http://instagram.com/inst_acc
9
- http://hi5.com/hi5_acc
10
- http://www.hi5.com/profile.html?uid=12341234
11
- http://pinterest.com/pinterest_acc
12
- http://blogger-acc.blogspot.com
13
- http://livejournal-acc.livejournal.com
14
- http://livejournal-acc2.livejournal.ru
15
- https://www.blogger.com/blogger.g?blogID=12341234#overview/src=dashboard
16
- http://vk.com/vk_acc
17
- http://linkedin.com/in/xnutsive
18
- http://www.linkedin.com/company/evil-martians
19
- http://www.linkedin.com/profile/view?id=12341234&trk=nav_responsive_tab_profile
20
- http://Ameblo.jp/ameba_acc
21
- http://reddit.com/user/reddit_acc
22
- https://twitter.com/twi_acc
23
- https://vimeo.com/vimeo_acc
24
- https://plus.google.com/12341234
25
- https://plus.google.com/+VladimirBokov
26
- https://soundcloud.com/sc_acc
27
- https://youtube.com/channels/yb_acc
28
- http://tumblr-acc.tumblr.com
29
- http://odnoklassniki.com/profile/12341234/about
30
- http://ok.ru/profile/12341234/about
31
- http://odnoklassniki.com/group/43214321/about?some=123
32
- http://moikrug-acc.moikrug.ru
33
- https://my.mail.ru/community/test-group-102/
34
- https://my.mail.ru/mail/gazay/
35
- )
5
+ https://www.facebook.com/fb_acc
6
+ https://facebook.com/fb_acc2<U+200>
7
+ http://instagram.com/inst_acc
8
+ http://hi5.com/hi5_acc
9
+ http://www.hi5.com/profile.html?uid=12341234
10
+ http://pinterest.com/pinterest_acc
11
+ http://blogger-acc.blogspot.com
12
+ http://livejournal-acc.livejournal.com
13
+ http://livejournal-acc2.livejournal.ru
14
+ https://www.blogger.com/blogger.g?blogID=12341234#overview/src=dashboard
15
+ http://vk.com/vk_acc
16
+ http://linkedin.com/in/xnutsive
17
+ http://www.linkedin.com/company/evil-martians
18
+ http://www.linkedin.com/profile/view?id=12341234&trk=nav_responsive_tab_profile
19
+ http://Ameblo.jp/ameba_acc
20
+ http://reddit.com/user/reddit_acc
21
+ https://twitter.com/twi_acc
22
+ https://vimeo.com/vimeo_acc
23
+ https://plus.google.com/12341234
24
+ https://plus.google.com/+VladimirBokov
25
+ https://soundcloud.com/sc_acc
26
+ https://youtube.com/channels/yb_acc
27
+ http://tumblr-acc.tumblr.com
28
+ http://odnoklassniki.com/profile/12341234/about
29
+ http://ok.ru/profile/12341234/about
30
+ http://odnoklassniki.com/group/43214321/about?some=123
31
+ http://moikrug-acc.moikrug.ru
32
+ https://my.mail.ru/community/test-group-102/
33
+ https://my.mail.ru/mail/gazay/
34
+ )
36
35
 
37
36
  not_recognazible_links = %w(
38
- http://fucebook.com/not_recognized
39
- http://vka.com/not_recognized
40
- )
37
+ http://fucebook.com/not_recognized
38
+ http://vka.com/not_recognized
39
+ )
41
40
 
42
41
  not_parseble_links = %w(
43
- http://vk.com
44
- http://soundcloud.com
45
- )
42
+ http://vk.com
43
+ http://soundcloud.com
44
+ )
46
45
 
47
46
  describe 'recognize' do
48
-
49
47
  it 'not recognizes wrong links' do
50
48
  recognizer = IdsPlease.new(*not_recognazible_links)
51
49
  recognizer.recognize
@@ -138,13 +136,10 @@ describe IdsPlease do
138
136
  it 'recognizes mailru links' do
139
137
  expect(@recognizer.recognized[:mailru].count).to eq(2)
140
138
  end
141
-
142
139
  end
143
-
144
140
  end
145
141
 
146
142
  describe 'parse' do
147
-
148
143
  it 'not parse wrong links' do
149
144
  @recognizer = IdsPlease.new(*not_parseble_links)
150
145
  @recognizer.parse
@@ -170,7 +165,7 @@ describe IdsPlease do
170
165
  end
171
166
 
172
167
  it 'get right id from facebook link' do
173
- expect(@recognizer.parsed[:facebook]).to eq(['fb_acc', 'fb_acc2'])
168
+ expect(@recognizer.parsed[:facebook]).to eq(%w(fb_acc fb_acc2))
174
169
  end
175
170
 
176
171
  it 'get right id from linkedin link' do
@@ -210,7 +205,7 @@ describe IdsPlease do
210
205
  end
211
206
 
212
207
  it 'get right id from hi5 link' do
213
- expect(@recognizer.parsed[:hi5]).to eq(['hi5_acc', '12341234'])
208
+ expect(@recognizer.parsed[:hi5]).to eq(%w(hi5_acc 12341234))
214
209
  end
215
210
 
216
211
  it 'get right id from soundcloud link' do
@@ -234,14 +229,12 @@ describe IdsPlease do
234
229
  end
235
230
 
236
231
  it 'get right id from odnoklassniki link' do
237
- expect(@recognizer.parsed[:odnoklassniki].sort).to eq(['12341234', '43214321', '12341234'].sort)
232
+ expect(@recognizer.parsed[:odnoklassniki].sort).to eq(%w(12341234 43214321 12341234).sort)
238
233
  end
239
234
 
240
235
  it 'get right id from moikrug link' do
241
236
  expect(@recognizer.parsed[:moikrug].first).to eq('moikrug-acc')
242
237
  end
243
-
244
238
  end
245
239
  end
246
-
247
240
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ids_please
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - gazay
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-17 00:00:00.000000000 Z
11
+ date: 2016-04-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -40,7 +40,8 @@ dependencies:
40
40
  version: '0'
41
41
  description: Helps to get ids or screen names from links to social network accounts
42
42
  email: alex.gaziev@gmail.com
43
- executables: []
43
+ executables:
44
+ - ids_please
44
45
  extensions: []
45
46
  extra_rdoc_files:
46
47
  - LICENSE
@@ -51,8 +52,10 @@ files:
51
52
  - LICENSE
52
53
  - README.md
53
54
  - Rakefile
55
+ - bin/ids_please
54
56
  - ids_please.gemspec
55
57
  - lib/ids_please.rb
58
+ - lib/ids_please/cli.rb
56
59
  - lib/ids_please/grabbers.rb
57
60
  - lib/ids_please/grabbers/base.rb
58
61
  - lib/ids_please/grabbers/facebook.rb
@@ -106,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
106
109
  version: '0'
107
110
  requirements: []
108
111
  rubyforge_project:
109
- rubygems_version: 2.4.5
112
+ rubygems_version: 2.4.5.1
110
113
  signing_key:
111
114
  specification_version: 4
112
115
  summary: Helps to get ids or screen names from links to social network accounts