ids_please 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cbbb7fdc96da69f11a60cfbda149c1efc18cf3ec
4
- data.tar.gz: a32ec832041c194f72d78aae0f410688edbd3239
3
+ metadata.gz: 728f2add26771adaaedbb2afb42fa59126e19a10
4
+ data.tar.gz: 56285e208b28c26303362a8070b3e538eecac37a
5
5
  SHA512:
6
- metadata.gz: e903c0a4695122ade18201fe6f33035644c2193cd13e1f950426da6ba98de85ccb4953f6aab4069288652b202441ed4992b09cd51e13ea34d544e8afe5060ed8
7
- data.tar.gz: 94ef374b9075560bad7f53dd829d98a499f3b84d923de4392df9f77a07afeb628a723ef8e977b9fcde2323c94084da47d726e14791c08f3d81997d64905475da
6
+ metadata.gz: a26e536a30e7461fd9396df7d1db9806affb1c9212fdc4850eb853588a706b705c2b16c5f012cff8c7d04be30e88408ffbcea8f28967109d1ef7f81c1995eaec
7
+ data.tar.gz: d051a8a184798fc5d080f65db17bc78ce265e6030748923c3a6e6f0599154342ec1ec1de626ddb077c934c086015ea59d8d0b7d68c626c53113f26306e70a75d
data/Rakefile CHANGED
@@ -4,9 +4,9 @@ require 'bundler'
4
4
  Bundler::GemHelper.install_tasks
5
5
 
6
6
  desc 'Run all tests by default'
7
- task :default => :spec
7
+ task default: :spec
8
8
 
9
9
  require 'rspec/core/rake_task'
10
10
  RSpec::Core::RakeTask.new do |t|
11
- t.rspec_opts = ["--color", '--format doc']
12
- end
11
+ t.rspec_opts = ['--color', '--format doc']
12
+ end
data/bin/ids_please ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ ids_please_dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ $LOAD_PATH.unshift(ids_please_dir) unless $LOAD_PATH.include?(ids_please_dir)
4
+ require 'ids_please'
5
+ require 'ids_please/cli'
6
+
7
+ IdsPlease::CLI.run(ARGV)
data/ids_please.gemspec CHANGED
@@ -4,17 +4,19 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'ids_please/version'
5
5
 
6
6
  Gem::Specification.new do |s|
7
- s.name = "ids_please"
7
+ s.name = 'ids_please'
8
8
  s.version = IdsPlease::VERSION
9
- s.authors = ["gazay"]
10
- s.description = %q{Helps to get ids or screen names from links to social network accounts}
11
- s.summary = %q{Helps to get ids or screen names from links to social network accounts}
9
+ s.authors = ['gazay']
10
+ s.description = 'Helps to get ids or screen names from links to social network accounts'
11
+ s.summary = 'Helps to get ids or screen names from links to social network accounts'
12
12
  s.licenses = ['MIT']
13
- s.email = "alex.gaziev@gmail.com"
14
- s.extra_rdoc_files = ["LICENSE"]
15
- s.rdoc_options = ["--charset=UTF-8"]
16
- s.homepage = "http://github.com/gazay/ids_please"
17
- s.require_paths = ["lib"]
13
+ s.email = 'alex.gaziev@gmail.com'
14
+ s.extra_rdoc_files = ['LICENSE']
15
+ s.rdoc_options = ['--charset=UTF-8']
16
+ s.homepage = 'http://github.com/gazay/ids_please'
17
+ s.require_paths = ['lib']
18
+ s.bindir = 'bin'
19
+ s.executables = 'ids_please'
18
20
  s.files = `git ls-files`.split("\n")
19
21
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
22
  s.add_development_dependency 'rake'
@@ -0,0 +1,112 @@
1
+ class IdsPlease
2
+ module CLI
3
+ def self.run(args)
4
+ command = args.shift
5
+ case command
6
+ when 'grab', 'parse', 'recognize'
7
+ when 'help', nil
8
+ help
9
+ exit
10
+ else
11
+ abort "Unknown command. Enter 'ids_please help' for instructions"
12
+ end
13
+
14
+ links = args
15
+ if links.empty?
16
+ abort "You didn't enter any links. Enter 'ids_please help' for instructions"
17
+ end
18
+
19
+ ids = IdsPlease.new(*links)
20
+ case command
21
+ when 'grab'
22
+ grab(ids)
23
+ when 'parse'
24
+ parse(ids)
25
+ when 'recognize'
26
+ recognize(ids)
27
+ end
28
+ end
29
+
30
+ module_function
31
+
32
+ def grab(ids)
33
+ ids.grab
34
+ ids.grabbed.each do |social_network, grabbers_array|
35
+ puts social_network.to_s.capitalize + ': '
36
+ grabbers_array.each do |grabber|
37
+ grabber.to_h.each do |property, value|
38
+ unless value.nil? || value.to_s.empty? || property == :page_source
39
+
40
+ if value.class == Hash
41
+ value.delete_if { |_, v| v.nil? }
42
+ unless value.empty?
43
+ puts " #{property}: "
44
+ value.each do |k, v|
45
+ puts " #{k}: #{v}"
46
+ end
47
+ end
48
+ else
49
+ puts " #{property}: #{value}"
50
+ end
51
+
52
+ end
53
+ end
54
+ puts "\n" unless grabbers_array.last == grabber
55
+ end
56
+ puts "\n" unless ids.grabbed.to_a.last[0] == social_network
57
+ end
58
+ end
59
+
60
+ def parse(ids)
61
+ ids.parse
62
+ ids.parsed.each do |social_network, permalinks_array|
63
+ puts social_network.to_s.capitalize + ': '
64
+ permalinks_array.each do |permalink|
65
+ puts " #{permalink}"
66
+ end
67
+ puts "\n" unless ids.parsed.to_a.last[0] == social_network
68
+ end
69
+ end
70
+
71
+ def recognize(ids)
72
+ ids.recognize
73
+ unless ids.recognized.empty?
74
+ puts 'Recognized:'
75
+ ids.recognized.each do |social_network, urls_array|
76
+ puts " #{social_network.to_s.capitalize}: "
77
+ urls_array.each do |url|
78
+ puts " #{url}"
79
+ end
80
+ puts "\n"
81
+ end
82
+ end
83
+
84
+ unless ids.unrecognized.empty?
85
+ puts 'Unrecognized:'
86
+ ids.unrecognized.each do |url|
87
+ puts " #{url}"
88
+ end
89
+ end
90
+ end
91
+
92
+ def help
93
+ puts <<-HELP.gsub(/^ {8}/, '')
94
+ IDs, please
95
+ Grab some hidden in html data from social account page
96
+ Get social network IDs or screen names from links to social network accounts
97
+
98
+ Usage:
99
+ ids_please command [links]
100
+
101
+ Available commands:
102
+ grab grab some hidden in html data from social account page (avatar, username, id...)
103
+ parse get screen names from links to social network accounts
104
+ recognize check that the link is for a known social network
105
+
106
+ Examples:
107
+ ids_please grab https://instagram.com/microsoft
108
+ ids_please parse https://facebook.com/Microsoft https://instagram.com/microsoft
109
+ HELP
110
+ end
111
+ end
112
+ end
@@ -8,13 +8,20 @@ class IdsPlease
8
8
  links.map { |l| self.new(l).grab_link }
9
9
  end
10
10
 
11
- attr_reader :avatar, :display_name, :username, :link, :page_source, :network_id, :data, :counts
11
+ attr_reader :avatar,
12
+ :display_name,
13
+ :username,
14
+ :link,
15
+ :page_source,
16
+ :network_id,
17
+ :data,
18
+ :counts
12
19
 
13
20
  def initialize(link)
14
21
  @link = link
15
22
  end
16
23
 
17
- def grab_link(link)
24
+ def grab_link(_link)
18
25
  throw 'Base grabber can not grab anything'
19
26
  end
20
27
 
@@ -48,6 +55,19 @@ class IdsPlease
48
55
  def page_source
49
56
  @page_source ||= open(link).read
50
57
  end
58
+
59
+ def errors
60
+ @errors ||= []
61
+ end
62
+
63
+ def record_error(event, message)
64
+ errors << "#{event} has #{message}"
65
+ end
66
+
67
+ def find_by_regex(reg)
68
+ page_source.scan(reg).flatten.first
69
+ end
70
+
51
71
  end
52
72
  end
53
73
  end
@@ -3,43 +3,96 @@ class IdsPlease
3
3
  class Facebook < IdsPlease::Grabbers::Base
4
4
 
5
5
  def grab_link
6
- @network_id = page_source.scan(/entity_id":"(\d+)"/).flatten.first
7
- @avatar = page_source.scan(/og:image" content="([^"]+)"/).flatten.first
8
- @display_name = page_source.scan(/og:title" content="([^"]+)"/).flatten.first
9
- @username = page_source.scan(/og:url" content="[^"]+\/([^\/"]+)"/).flatten.first
10
- @avatar = CGI.unescapeHTML(@avatar.encode('utf-8')) if @avatar
11
- @display_name = CGI.unescapeHTML(@display_name.encode('utf-8')) if @display_name
12
- @data = {}
13
- {
14
- type: page_source.scan(/og:type" content="([^"]+)"/).flatten.first.to_s.encode('utf-8'),
15
- description: page_source.scan(/og:description" content="([^"]+)"/).flatten.first.to_s.encode('utf-8'),
16
- }.each do |k, v|
17
- next if v.nil? || v == ''
18
- @data[k] = CGI.unescapeHTML(v).strip
19
- end
6
+ @network_id = find_network_id
7
+ @avatar = find_avatar
8
+ @display_name = find_display_name
9
+ @username = find_username
10
+
20
11
  @counts = {
21
- likes: likes,
22
- visits: visits,
23
- }.delete_if {|k,v| v.nil? }
12
+ likes: find_likes,
13
+ visits: find_visits
14
+ }.delete_if { |_k, v| v.nil? }
15
+
16
+ @data = {
17
+ type: find_type,
18
+ description: find_description
19
+ }.delete_if { |_k, v| v.nil? }
20
+
24
21
  self
25
22
  rescue => e
26
- p e
23
+ record_error __method__, e.message
27
24
  return self
28
25
  end
29
26
 
30
- def likes
31
- page_source.scan(/>([^"]+) <span class=".+">likes/).flatten.first.to_s.tr(',','').to_i
27
+ private
28
+
29
+ def find_network_id
30
+ find_by_regex(/entity_id":"(\d+)"/)
31
+ rescue => e
32
+ record_error __method__, e.message
33
+ return nil
34
+ end
35
+
36
+ def find_avatar
37
+ CGI.unescapeHTML(
38
+ find_by_regex(/profilePic\simg"\salt=[^=]+="([^"]+)/).encode('utf-8')
39
+ )
40
+ rescue => e
41
+ record_error __method__, e.message
42
+ return nil
43
+ end
44
+
45
+ def find_display_name
46
+ CGI.unescapeHTML(
47
+ find_by_regex(/pageTitle">([^<\|]+)/).strip.encode('utf-8')
48
+ )
49
+ rescue => e
50
+ record_error __method__, e.message
51
+ return nil
52
+ end
53
+
54
+ def find_username
55
+ find_by_regex(/link\srel="canonical"\shref="https:\/\/facebook\.com\/([^"]+)/) ||
56
+ find_by_regex(/;\sURL=\/([^\/\?]+)/)
57
+ rescue => e
58
+ record_error __method__, e.message
59
+ return nil
60
+ end
61
+
62
+ def find_type
63
+ find_by_regex(/type":"Person/) ? 'perosnal' : 'group'
64
+ rescue => e
65
+ record_error __method__, e.message
66
+ return nil
67
+ end
68
+
69
+ def find_description
70
+ CGI.unescapeHTML(
71
+ find_by_regex(/name="description" content="([^"]+)"/).encode('utf-8')
72
+ ).strip
73
+ rescue => e
74
+ record_error __method__, e.message
75
+ return nil
76
+ end
77
+
78
+ def find_likes
79
+ if likes = find_by_regex(/>([^"]+) <span class=".+">likes/)
80
+ likes.tr(',', '').to_i
81
+ end
32
82
  rescue => e
33
- p e
83
+ record_error __method__, e.message
34
84
  return nil
35
85
  end
36
86
 
37
- def visits
38
- page_source.scan(/likes.+>([^"]+)<\/span> <span class=".+">visits/).flatten.first.to_s.tr(',','').to_i
87
+ def find_visits
88
+ if visits = find_by_regex(/likes.+>([^"]+)<\/span> <span class=".+">visits/)
89
+ visits.tr(',', '').to_i
90
+ end
39
91
  rescue => e
40
- p e
92
+ record_error __method__, e.message
41
93
  return nil
42
94
  end
95
+
43
96
  end
44
97
  end
45
98
  end
@@ -3,23 +3,79 @@ class IdsPlease
3
3
  class GooglePlus < IdsPlease::Grabbers::Base
4
4
 
5
5
  def grab_link
6
- @network_id = page_source.scan(/data-oid="(\d+)"/).flatten.first
7
- @avatar = 'https:' + page_source.scan(/guidedhelpid="profile_photo"><img src="([^"]+)"/).flatten.first
8
- @display_name = page_source.scan(/og:title" content="([^"]+)"/).flatten.first.gsub(' - Google+','')
9
- @username = '+' + page_source.scan(/&quot;https:\/\/plus.google.com\/\+(.+?)&quot;/).flatten.first
10
- @data = {
11
- description: page_source.scan(/name="Description" content="([^"]+)">/).flatten.first.to_s.encode('utf-8')
12
- }
6
+ @network_id = find_network_id
7
+ @avatar = find_avatar
8
+ @display_name = find_display_name
9
+ @username = find_username
10
+
13
11
  @counts = {
14
- followers: page_source.scan(/">([^"]+)<\/span> followers</).flatten.first.to_s.tr(',','').to_i,
15
- views: page_source.scan(/">([^"]+)<\/span> views</).flatten.first.to_s.tr(',','').to_i,
16
- }
12
+ followers: find_followers,
13
+ views: find_views
14
+ }.delete_if { |_k, v| v.nil? }
15
+
16
+ @data = {
17
+ description: find_description
18
+ }.delete_if { |_k, v| v.nil? }
19
+
17
20
  self
18
21
  rescue => e
19
- p e
22
+ record_error __method__, e.message
20
23
  return self
21
24
  end
22
25
 
26
+ def find_network_id
27
+ find_by_regex(/oid="(\d+)"/)
28
+ rescue => e
29
+ record_error __method__, e.message
30
+ return nil
31
+ end
32
+
33
+ def find_avatar
34
+ "https:#{find_by_regex(/guidedhelpid="profile_photo"><img src="([^"]+)"/)}"
35
+ rescue => e
36
+ record_error __method__, e.message
37
+ return nil
38
+ end
39
+
40
+ def find_display_name
41
+ find_by_regex(/og:title" content="([^"]+)"/).gsub(' - Google+', '')
42
+ rescue => e
43
+ record_error __method__, e.message
44
+ return nil
45
+ end
46
+
47
+ def find_username
48
+ "+#{find_by_regex(/&quot;https:\/\/plus.google.com\/\+(.+?)&quot;/)}"
49
+ rescue => e
50
+ record_error __method__, e.message
51
+ return nil
52
+ end
53
+
54
+ def find_description
55
+ find_by_regex(/name="Description" content="([^"]+)">/).encode('utf-8')
56
+ rescue => e
57
+ record_error __method__, e.message
58
+ return nil
59
+ end
60
+
61
+ def find_followers
62
+ if followers = find_by_regex(/">([^"]+)<\/span> followers</)
63
+ followers.tr(',', '').to_i
64
+ end
65
+ rescue => e
66
+ record_error __method__, e.message
67
+ return nil
68
+ end
69
+
70
+ def find_views
71
+ if views = find_by_regex(/">([^"]+)<\/span> views</)
72
+ views.tr(',', '').to_i
73
+ end
74
+ rescue => e
75
+ record_error __method__, e.message
76
+ return nil
77
+ end
78
+
23
79
  end
24
80
  end
25
81
  end
@@ -5,25 +5,96 @@ class IdsPlease
5
5
  class Instagram < IdsPlease::Grabbers::Base
6
6
 
7
7
  def grab_link
8
- @network_id = page_source.scan(/"user":{.+"id":"(\d+)"/).flatten.first
9
- @avatar = page_source.scan(/"user":{.+"profile_pic_url":"([^"]+)"/).flatten.first.gsub('\\', '')
10
- @display_name = page_source.scan(/"user":{.+"full_name":"([^"]+)"/).flatten.first
11
- @username = page_source.scan(/"user":{"username":"([^"]+)"/).flatten.first.gsub('\\', '')
12
- @data = {
13
- bio: page_source.scan(/"biography":"([^"]+)"/).flatten.first,
14
- website: page_source.scan(/"user":{.+"external_url":"([^"]+)"/).flatten.first.gsub('\\', ''),
15
- }
8
+ @network_id = find_network_id
9
+ @avatar = find_avatar
10
+ @display_name = find_display_name
11
+ @username = find_username
12
+
16
13
  @counts = {
17
- media: page_source.scan(/"media":{"count":(\d+)/).flatten.first.to_i,
18
- followed_by: page_source.scan(/"followed_by":{"count":(\d+)/).flatten.first.to_i,
19
- follows: page_source.scan(/"follows":{"count":(\d+)/).flatten.first.to_i,
20
- }
21
- @display_name = @display_name.gsub(/\\u([\da-fA-F]{4})/) {|m| [$1].pack("H*").unpack("n*").pack("U*")}
14
+ media: find_media,
15
+ followed_by: find_followed_by,
16
+ follows: find_follows
17
+ }.delete_if { |_k, v| v.nil? }
18
+
19
+ @data = {
20
+ bio: find_bio,
21
+ website: find_website
22
+ }.delete_if { |_k, v| v.nil? }
23
+
22
24
  self
23
25
  rescue => e
24
- p e
26
+ record_error __method__, e.message
25
27
  return self
26
28
  end
29
+
30
+ private
31
+
32
+ def find_network_id
33
+ find_by_regex(/"user":{.+"id":"(\d+)"/)
34
+ rescue => e
35
+ record_error __method__, e.message
36
+ return nil
37
+ end
38
+
39
+ def find_avatar
40
+ find_by_regex(/"user":{.+"profile_pic_url":"([^"]+)"/).gsub('\\', '')
41
+ rescue => e
42
+ record_error __method__, e.message
43
+ return nil
44
+ end
45
+
46
+ def find_display_name
47
+ _display_name = find_by_regex(/"user":{.+"full_name":"([^"]+)"/)
48
+ _display_name.gsub(/\\u([\da-fA-F]{4})/) { |_m|
49
+ [Regexp.last_match(1)].pack('H*').unpack('n*').pack('U*')
50
+ }
51
+ rescue => e
52
+ record_error __method__, e.message
53
+ return nil
54
+ end
55
+
56
+ def find_username
57
+ find_by_regex(/"user":{"username":"([^"]+)"/).gsub('\\', '')
58
+ rescue => e
59
+ record_error __method__, e.message
60
+ return nil
61
+ end
62
+
63
+ def find_bio
64
+ CGI.unescapeHTML(find_by_regex(/"biography":"([^"]+)"/)).strip
65
+ rescue => e
66
+ record_error __method__, e.message
67
+ return nil
68
+ end
69
+
70
+ def find_website
71
+ CGI.unescapeHTML(find_by_regex(/"user":{.+"external_url":"([^"]+)"/).gsub('\\', '')).strip
72
+ rescue => e
73
+ record_error __method__, e.message
74
+ return nil
75
+ end
76
+
77
+ def find_media
78
+ find_by_regex(/"media":{"count":(\d+)/).to_i
79
+ rescue => e
80
+ record_error __method__, e.message
81
+ return nil
82
+ end
83
+
84
+ def find_followed_by
85
+ find_by_regex(/"followed_by":{"count":(\d+)/).to_i
86
+ rescue => e
87
+ record_error __method__, e.message
88
+ return nil
89
+ end
90
+
91
+ def find_follows
92
+ find_by_regex(/"follows":{"count":(\d+)/).to_i
93
+ rescue => e
94
+ record_error __method__, e.message
95
+ return nil
96
+ end
97
+
27
98
  end
28
99
  end
29
100
  end
@@ -9,17 +9,17 @@ class IdsPlease
9
9
  uid_url = "http://appsmail.ru/platform/#{link.split('/')[-2..-1].join('/')}"
10
10
  @network_id = JSON.parse(open(uid_url).read)['uid']
11
11
  @username, type = get_name_and_type(link)
12
- @avatar = page_source.scan(/profile__avatar" src="([^"]+)/).flatten.first
13
- @display_name = page_source.scan(/h1.+title="([^"]+)/).flatten.first
12
+ @avatar = find_by_regex(/profile__avatar" src="([^"]+)/)
13
+ @display_name = find_by_regex(/h1.+title="([^"]+)/)
14
14
  @display_name = CGI.unescapeHTML(@display_name) if @display_name
15
15
  @data = {
16
16
  type: type,
17
- description: page_source.scan(/profile__content_mainInfo" title="([^"]+)/).flatten.first
17
+ description: find_by_regex(/profile__content_mainInfo" title="([^"]+)/)
18
18
  }
19
19
  @data[:description] = CGI.unescapeHTML(@data[:description]) if @data[:description]
20
20
  self
21
21
  rescue => e
22
- p e
22
+ record_error __method__, e.message
23
23
  return self
24
24
  end
25
25
 
@@ -3,32 +3,118 @@ class IdsPlease
3
3
  class Twitter < IdsPlease::Grabbers::Base
4
4
 
5
5
  def grab_link
6
- @network_id = page_source.scan(/data-user-id="(\d+)"/).flatten.first
7
- @avatar = page_source.scan(/ProfileAvatar-image " src="([^"]+)"/).flatten.first
8
- @display_name = page_source.scan(/ProfileHeaderCard-nameLink[^>]+>([^<]+)</).flatten.first
9
- @username = page_source.scan(/<title>[^\(]+\(@([^\)]+)\)/).flatten.first
10
- @data = {}
11
- {
12
- description: page_source.scan(/ProfileHeaderCard-bio[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
13
- location: page_source.scan(/ProfileHeaderCard-locationText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
14
- join_date: page_source.scan(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</).flatten.first.to_s.encode('utf-8'),
15
- }.each do |k, v|
16
- next if v.nil? || v == ''
17
- @data[k] = CGI.unescapeHTML(v).strip
18
- end
6
+ @network_id = find_network_id
7
+ @avatar = find_avatar
8
+ @display_name = find_display_name
9
+ @username = find_username
10
+
19
11
  @counts = {
20
- tweets: page_source.scan(/statuses_count&quot;:(\d+),&quot;/).flatten.first.to_i,
21
- following: page_source.scan(/friends_count&quot;:(\d+),&quot;/).flatten.first.to_i,
22
- followers: page_source.scan(/followers_count&quot;:(\d+),&quot;/).flatten.first.to_i,
23
- favorites: page_source.scan(/favourites_count&quot;:(\d+),&quot;/).flatten.first.to_i,
24
- lists: page_source.scan(/listed_count&quot;:(\d+),&quot;/).flatten.first.to_i,
25
- }
12
+ tweets: find_tweets,
13
+ following: find_following,
14
+ followers: find_followers,
15
+ favorites: find_favorites,
16
+ lists: find_listed
17
+ }.delete_if { |_k, v| v.nil? }
18
+
19
+ @data = {
20
+ description: find_description,
21
+ location: find_location,
22
+ join_date: find_join_date
23
+ }.delete_if { |_k, v| v.nil? }
24
+
26
25
  self
27
26
  rescue => e
28
- p e
27
+ record_error __method__, e.message
29
28
  return self
30
29
  end
31
30
 
31
+ def find_network_id
32
+ find_by_regex(/data-user-id="(\d+)"/)
33
+ rescue => e
34
+ record_error __method__, e.message
35
+ return nil
36
+ end
37
+
38
+ def find_avatar
39
+ find_by_regex(/ProfileAvatar-image " src="([^"]+)"/)
40
+ rescue => e
41
+ record_error __method__, e.message
42
+ return nil
43
+ end
44
+
45
+ def find_display_name
46
+ find_by_regex(/ProfileHeaderCard-nameLink[^>]+>([^<]+)</)
47
+ rescue => e
48
+ record_error __method__, e.message
49
+ return nil
50
+ end
51
+
52
+ def find_username
53
+ find_by_regex(/<title>[^\(]+\(@([^\)]+)\)/)
54
+ rescue => e
55
+ record_error __method__, e.message
56
+ return nil
57
+ end
58
+
59
+ def find_description
60
+ _desc = find_by_regex(/ProfileHeaderCard-bio[^>]+>([^<]+)</)
61
+ CGI.unescapeHTML(_desc.encode('utf-8')).strip
62
+ rescue => e
63
+ record_error __method__, e.message
64
+ return nil
65
+ end
66
+
67
+ def find_location
68
+ _loc = find_by_regex(/ProfileHeaderCard-locationText[^>]+>([^<]+)</)
69
+ CGI.unescapeHTML(_loc.encode('utf-8')).strip
70
+ rescue => e
71
+ record_error __method__, e.message
72
+ return nil
73
+ end
74
+
75
+ def find_join_date
76
+ _date = find_by_regex(/ProfileHeaderCard-joinDateText[^>]+>([^<]+)</)
77
+ CGI.unescapeHTML(_date.encode('utf-8')).strip
78
+ rescue => e
79
+ record_error __method__, e.message
80
+ return nil
81
+ end
82
+
83
+ def find_tweets
84
+ find_by_regex(/statuses_count&quot;:(\d+),&quot;/).to_i
85
+ rescue => e
86
+ record_error __method__, e.message
87
+ return nil
88
+ end
89
+
90
+ def find_followers
91
+ find_by_regex(/followers_count&quot;:(\d+),&quot;/).to_i
92
+ rescue => e
93
+ record_error __method__, e.message
94
+ return nil
95
+ end
96
+
97
+ def find_following
98
+ find_by_regex(/friends_count&quot;:(\d+),&quot;/).to_i
99
+ rescue => e
100
+ record_error __method__, e.message
101
+ return nil
102
+ end
103
+
104
+ def find_favorites
105
+ find_by_regex(/favourites_count&quot;:(\d+),&quot;/).to_i
106
+ rescue => e
107
+ record_error __method__, e.message
108
+ return nil
109
+ end
110
+
111
+ def find_listed
112
+ find_by_regex(/listed_count&quot;:(\d+),&quot;/).to_i
113
+ rescue => e
114
+ record_error __method__, e.message
115
+ return nil
116
+ end
117
+
32
118
  end
33
119
  end
34
120
  end
@@ -1,26 +1,24 @@
1
1
  class IdsPlease
2
2
  module Grabbers
3
3
  class Vkontakte < IdsPlease::Grabbers::Base
4
-
5
4
  def grab_link
6
5
  agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'
7
6
  @page_source ||= open(link, 'User-Agent' => agent).read.encode('utf-8')
8
- @network_id = page_source.scan(/href="\/wall(-\d+)_/).flatten.first
7
+ @network_id = find_by_regex(/href="\/wall(-\d+)_/)
9
8
  @username = @link.to_s.split('vk.com/').last.gsub('/', '')
10
- @avatar = page_source.scan(/page_avatar.+\n.+src="([^"]+)/).flatten.first
9
+ @avatar = find_by_regex(/page_avatar.+\n.+src="([^"]+)/)
11
10
  @avatar = CGI.unescapeHTML(@avatar) if @avatar
12
- @display_name = page_source.scan(/page_name">([^<]+)/).flatten.first
11
+ @display_name = find_by_regex(/page_name">([^<]+)/)
13
12
  @display_name = CGI.unescapeHTML(@display_name) if @display_name
14
13
  @data = {
15
- description: page_source.scan(/description" content="([^"]+)/).flatten.first
14
+ description: find_by_regex(/description" content="([^"]+)/)
16
15
  }
17
16
  @data[:description] = CGI.unescapeHTML(@data[:description]) if @data[:description]
18
17
  self
19
18
  rescue => e
20
- p e
19
+ record_error __method__, e.message
21
20
  return self
22
21
  end
23
-
24
22
  end
25
23
  end
26
24
  end
@@ -8,14 +8,14 @@ require_relative 'grabbers/google_plus'
8
8
 
9
9
  class IdsPlease
10
10
  module Grabbers
11
-
11
+
12
12
  NETWORKS = {
13
13
  facebook: IdsPlease::Grabbers::Facebook,
14
14
  vkontakte: IdsPlease::Grabbers::Vkontakte,
15
15
  twitter: IdsPlease::Grabbers::Twitter,
16
16
  instagram: IdsPlease::Grabbers::Instagram,
17
17
  mailru: IdsPlease::Grabbers::Mailru,
18
- google_plus: IdsPlease::Grabbers::GooglePlus,
18
+ google_plus: IdsPlease::Grabbers::GooglePlus
19
19
  }
20
20
 
21
21
  def self.each
@@ -25,6 +25,6 @@ class IdsPlease
25
25
  def self.by_symbol(sym)
26
26
  NETWORKS[sym]
27
27
  end
28
-
28
+
29
29
  end
30
30
  end
@@ -1,10 +1,10 @@
1
1
  class IdsPlease
2
2
  module Parsers
3
3
  class Base
4
-
5
4
  class << self
5
+
6
6
  def to_sym
7
- self.name.split('::').last.downcase.to_sym
7
+ name.split('::').last.downcase.to_sym
8
8
  end
9
9
 
10
10
  def interact(links)
@@ -15,8 +15,6 @@ class IdsPlease
15
15
  end.compact
16
16
  end
17
17
 
18
- private
19
-
20
18
  def parse_link(link)
21
19
  link.path.split('/')[1]
22
20
  end
@@ -24,8 +22,8 @@ class IdsPlease
24
22
  def valid_id_regex
25
23
  /\A([\w\.\+-]{2,})/
26
24
  end
27
- end
28
25
 
26
+ end
29
27
  end
30
28
  end
31
29
  end
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /fb\.me|fb\.com|facebook/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  query = CGI.parse(link.query) if link.query && !link.query.empty?
12
10
 
@@ -13,8 +13,6 @@ class IdsPlease
13
13
  links.map { |l| parse_link(l) }.compact
14
14
  end
15
15
 
16
- private
17
-
18
16
  def parse_link(link)
19
17
  if matched = link.path.match(/\/(\+\w+)/)
20
18
  matched[1]
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /hi5/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  query = CGI.parse(link.query) if link.query && !link.query.empty?
12
10
 
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /linkedin/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  query = CGI.parse(link.query) if link.query && !link.query.empty?
12
10
 
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /livejournal/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  parsed = link.host.sub('.livejournal.com', '')
12
10
  parsed = link.host.split('.livejournal').first if parsed == link.host
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /mail\.ru/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  id = link.path.split('/')[2]
12
10
  id.split('?').first.split('#').first
@@ -12,8 +12,6 @@ class IdsPlease
12
12
  end.compact
13
13
  end
14
14
 
15
- private
16
-
17
15
  def parse_link(link)
18
16
  link.host.sub('.moikrug.ru', '')
19
17
  end
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /odnoklassniki|ok\.ru/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  if matched = link.path.match(/\/(\d{2,})/)
12
10
  matched[1]
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /reddit/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  link.path.split('/')[2]
12
10
  end
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /tumblr/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  return if link.host.sub('.tumblr.com', '') == link.host
12
10
  link.host.sub('.tumblr.com', '')
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /twitter/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  if link.path =~ /%23!/
12
10
  id = link.path.sub(/\A\/%23!\//, '')
@@ -9,8 +9,6 @@ class IdsPlease
9
9
  links.map { |l| parse_link(l) }.compact
10
10
  end
11
11
 
12
- private
13
-
14
12
  def parse_link(link)
15
13
  if link.path =~ /id|club|public/
16
14
  id = link.path.sub(/\A\/id|\A\/club|\A\/public/, '')
@@ -5,8 +5,6 @@ class IdsPlease
5
5
  MASK = /youtu\.be|youtube/i
6
6
 
7
7
  class << self
8
- private
9
-
10
8
  def parse_link(link)
11
9
  if link.path =~ /channels|user/
12
10
  link.path.split('/')[2]
@@ -44,13 +44,13 @@ class IdsPlease
44
44
  mailru: IdsPlease::Parsers::Mailru
45
45
  }
46
46
 
47
- def self.each
48
- NETWORKS.values.each { |n| yield n }
49
- end
50
-
51
47
  def self.by_symbol(sym)
52
48
  NETWORKS[sym]
53
49
  end
54
50
 
51
+ def self.to_a
52
+ NETWORKS.values
53
+ end
54
+
55
55
  end
56
56
  end
@@ -1,3 +1,5 @@
1
1
  class IdsPlease
2
- VERSION = '2.2.0'
2
+
3
+ VERSION = '2.2.1'
4
+
3
5
  end
data/lib/ids_please.rb CHANGED
@@ -26,7 +26,7 @@ class IdsPlease
26
26
  end
27
27
 
28
28
  def recognized
29
- Hash[@recognized.map { |parser, links| [ parser.to_sym, links ] }]
29
+ Hash[@recognized.map { |parser, links| [parser.to_sym, links] }]
30
30
  end
31
31
 
32
32
  def parse
@@ -39,22 +39,22 @@ class IdsPlease
39
39
 
40
40
  private
41
41
 
42
- def interact(interactors = :parsers)
42
+ def interact(interactors_group = :parsers)
43
43
  recognize
44
44
  interacted = Hash.new { |hash, network| hash[network] = [] }
45
45
  @recognized.each do |network, links|
46
- interactor = IdsPlease.send(interactors).by_symbol(network)
46
+ interactor = IdsPlease.send(interactors_group).by_symbol(network)
47
47
  interacted[network].concat interactor.interact(links)
48
48
  end
49
- self.instance_variable_set(interacted_var(interactors), interacted)
49
+ instance_variable_set(interacted_var(interactors_group), interacted)
50
50
 
51
51
  interacted
52
52
  end
53
53
 
54
- def interacted_var(interactors)
55
- if interactors == :parsers
54
+ def interacted_var(interactors_group)
55
+ if interactors_group == :parsers
56
56
  :@parsed
57
- elsif interactors == :grabbers
57
+ elsif interactors_group == :grabbers
58
58
  :@grabbed
59
59
  else
60
60
  throw 'Wrong interactors type'
@@ -64,14 +64,14 @@ class IdsPlease
64
64
  def recognize_link(link)
65
65
  link = "http://#{link}" unless link =~ /\Ahttps?:\/\//
66
66
  parsed_link = URI(URI.encode(link))
67
- IdsPlease::Parsers.each do |network|
68
- if parsed_link.host =~ network::MASK
69
- @recognized[network.to_sym] ||= []
70
- @recognized[network.to_sym] << parsed_link
71
- return
72
- end
67
+
68
+ network = IdsPlease::Parsers.to_a.find { |n| parsed_link.host =~ n::MASK }
69
+
70
+ if network
71
+ @recognized[network.to_sym] ||= []
72
+ @recognized[network.to_sym] << parsed_link
73
+ else
74
+ @unrecognized << link
73
75
  end
74
- unrecognized << link
75
76
  end
76
-
77
77
  end
@@ -1,51 +1,49 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe IdsPlease do
4
-
5
4
  recognazible_links = %w(
6
- https://www.facebook.com/fb_acc
7
- https://facebook.com/fb_acc2<U+200>
8
- http://instagram.com/inst_acc
9
- http://hi5.com/hi5_acc
10
- http://www.hi5.com/profile.html?uid=12341234
11
- http://pinterest.com/pinterest_acc
12
- http://blogger-acc.blogspot.com
13
- http://livejournal-acc.livejournal.com
14
- http://livejournal-acc2.livejournal.ru
15
- https://www.blogger.com/blogger.g?blogID=12341234#overview/src=dashboard
16
- http://vk.com/vk_acc
17
- http://linkedin.com/in/xnutsive
18
- http://www.linkedin.com/company/evil-martians
19
- http://www.linkedin.com/profile/view?id=12341234&trk=nav_responsive_tab_profile
20
- http://Ameblo.jp/ameba_acc
21
- http://reddit.com/user/reddit_acc
22
- https://twitter.com/twi_acc
23
- https://vimeo.com/vimeo_acc
24
- https://plus.google.com/12341234
25
- https://plus.google.com/+VladimirBokov
26
- https://soundcloud.com/sc_acc
27
- https://youtube.com/channels/yb_acc
28
- http://tumblr-acc.tumblr.com
29
- http://odnoklassniki.com/profile/12341234/about
30
- http://ok.ru/profile/12341234/about
31
- http://odnoklassniki.com/group/43214321/about?some=123
32
- http://moikrug-acc.moikrug.ru
33
- https://my.mail.ru/community/test-group-102/
34
- https://my.mail.ru/mail/gazay/
35
- )
5
+ https://www.facebook.com/fb_acc
6
+ https://facebook.com/fb_acc2<U+200>
7
+ http://instagram.com/inst_acc
8
+ http://hi5.com/hi5_acc
9
+ http://www.hi5.com/profile.html?uid=12341234
10
+ http://pinterest.com/pinterest_acc
11
+ http://blogger-acc.blogspot.com
12
+ http://livejournal-acc.livejournal.com
13
+ http://livejournal-acc2.livejournal.ru
14
+ https://www.blogger.com/blogger.g?blogID=12341234#overview/src=dashboard
15
+ http://vk.com/vk_acc
16
+ http://linkedin.com/in/xnutsive
17
+ http://www.linkedin.com/company/evil-martians
18
+ http://www.linkedin.com/profile/view?id=12341234&trk=nav_responsive_tab_profile
19
+ http://Ameblo.jp/ameba_acc
20
+ http://reddit.com/user/reddit_acc
21
+ https://twitter.com/twi_acc
22
+ https://vimeo.com/vimeo_acc
23
+ https://plus.google.com/12341234
24
+ https://plus.google.com/+VladimirBokov
25
+ https://soundcloud.com/sc_acc
26
+ https://youtube.com/channels/yb_acc
27
+ http://tumblr-acc.tumblr.com
28
+ http://odnoklassniki.com/profile/12341234/about
29
+ http://ok.ru/profile/12341234/about
30
+ http://odnoklassniki.com/group/43214321/about?some=123
31
+ http://moikrug-acc.moikrug.ru
32
+ https://my.mail.ru/community/test-group-102/
33
+ https://my.mail.ru/mail/gazay/
34
+ )
36
35
 
37
36
  not_recognazible_links = %w(
38
- http://fucebook.com/not_recognized
39
- http://vka.com/not_recognized
40
- )
37
+ http://fucebook.com/not_recognized
38
+ http://vka.com/not_recognized
39
+ )
41
40
 
42
41
  not_parseble_links = %w(
43
- http://vk.com
44
- http://soundcloud.com
45
- )
42
+ http://vk.com
43
+ http://soundcloud.com
44
+ )
46
45
 
47
46
  describe 'recognize' do
48
-
49
47
  it 'not recognizes wrong links' do
50
48
  recognizer = IdsPlease.new(*not_recognazible_links)
51
49
  recognizer.recognize
@@ -138,13 +136,10 @@ describe IdsPlease do
138
136
  it 'recognizes mailru links' do
139
137
  expect(@recognizer.recognized[:mailru].count).to eq(2)
140
138
  end
141
-
142
139
  end
143
-
144
140
  end
145
141
 
146
142
  describe 'parse' do
147
-
148
143
  it 'not parse wrong links' do
149
144
  @recognizer = IdsPlease.new(*not_parseble_links)
150
145
  @recognizer.parse
@@ -170,7 +165,7 @@ describe IdsPlease do
170
165
  end
171
166
 
172
167
  it 'get right id from facebook link' do
173
- expect(@recognizer.parsed[:facebook]).to eq(['fb_acc', 'fb_acc2'])
168
+ expect(@recognizer.parsed[:facebook]).to eq(%w(fb_acc fb_acc2))
174
169
  end
175
170
 
176
171
  it 'get right id from linkedin link' do
@@ -210,7 +205,7 @@ describe IdsPlease do
210
205
  end
211
206
 
212
207
  it 'get right id from hi5 link' do
213
- expect(@recognizer.parsed[:hi5]).to eq(['hi5_acc', '12341234'])
208
+ expect(@recognizer.parsed[:hi5]).to eq(%w(hi5_acc 12341234))
214
209
  end
215
210
 
216
211
  it 'get right id from soundcloud link' do
@@ -234,14 +229,12 @@ describe IdsPlease do
234
229
  end
235
230
 
236
231
  it 'get right id from odnoklassniki link' do
237
- expect(@recognizer.parsed[:odnoklassniki].sort).to eq(['12341234', '43214321', '12341234'].sort)
232
+ expect(@recognizer.parsed[:odnoklassniki].sort).to eq(%w(12341234 43214321 12341234).sort)
238
233
  end
239
234
 
240
235
  it 'get right id from moikrug link' do
241
236
  expect(@recognizer.parsed[:moikrug].first).to eq('moikrug-acc')
242
237
  end
243
-
244
238
  end
245
239
  end
246
-
247
240
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ids_please
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - gazay
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-17 00:00:00.000000000 Z
11
+ date: 2016-04-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -40,7 +40,8 @@ dependencies:
40
40
  version: '0'
41
41
  description: Helps to get ids or screen names from links to social network accounts
42
42
  email: alex.gaziev@gmail.com
43
- executables: []
43
+ executables:
44
+ - ids_please
44
45
  extensions: []
45
46
  extra_rdoc_files:
46
47
  - LICENSE
@@ -51,8 +52,10 @@ files:
51
52
  - LICENSE
52
53
  - README.md
53
54
  - Rakefile
55
+ - bin/ids_please
54
56
  - ids_please.gemspec
55
57
  - lib/ids_please.rb
58
+ - lib/ids_please/cli.rb
56
59
  - lib/ids_please/grabbers.rb
57
60
  - lib/ids_please/grabbers/base.rb
58
61
  - lib/ids_please/grabbers/facebook.rb
@@ -106,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
106
109
  version: '0'
107
110
  requirements: []
108
111
  rubyforge_project:
109
- rubygems_version: 2.4.5
112
+ rubygems_version: 2.4.5.1
110
113
  signing_key:
111
114
  specification_version: 4
112
115
  summary: Helps to get ids or screen names from links to social network accounts