network_profile 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: eaa6afbf057402529a5e99c22ff61b98b1af6bcb8ca03cd869ddf03ba0e6893f
4
+ data.tar.gz: 1cd9b2fe99baba7cbc8f6774d8cd981bffa7e6860349df584d7041e9bb23aeb9
5
+ SHA512:
6
+ metadata.gz: 93002910a6cb60f7ca0434b2231a583e440e2d4bc9a4e23e3721d021ee6af70acf959f0b012932f65dc175e5e209edba490add5780137d9e6afb9af3ebe6fa7b
7
+ data.tar.gz: 649cea60a5a341322cb3927aac5cd35b59bf374632694d9443c6dc37cfacf00a4afefe42336e350dce742d06c3ee81a3512eaf5407a82e31f10af77c5e86d97d
@@ -0,0 +1,23 @@
1
+ name: Verify
2
+ on: [push]
3
+
4
+ jobs:
5
+ tests:
6
+ name: Tests
7
+ runs-on: ubuntu-latest
8
+ strategy:
9
+ matrix:
10
+ # ruby: [ '2.5', '2.6', '2.7' ]
11
+ ruby: [ '2.6' ]
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - uses: ruby/setup-ruby@v1
15
+ with:
16
+ ruby-version: ${{ matrix.ruby }}
17
+ - name: Install gems
18
+ run: |
19
+ bundle config path vendor/bundle
20
+ bundle install --jobs 4 --retry 3
21
+ - name: Run tests
22
+ run: bin/rspec
23
+
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ spec/.failures.txt
10
+
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in network_profile.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "pry"
8
+ gem "rspec", ">= 3.5"
9
+ gem "vcr"
10
+ gem "webmock"
@@ -0,0 +1,92 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ network_profile (0.1.0)
5
+ activesupport (>= 5.0.0)
6
+ nokogiri
7
+ rdf-microdata
8
+ typhoeus
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ activesupport (6.0.3.3)
14
+ concurrent-ruby (~> 1.0, >= 1.0.2)
15
+ i18n (>= 0.7, < 2)
16
+ minitest (~> 5.1)
17
+ tzinfo (~> 1.1)
18
+ zeitwerk (~> 2.2, >= 2.2.2)
19
+ addressable (2.7.0)
20
+ public_suffix (>= 2.0.2, < 5.0)
21
+ coderay (1.1.3)
22
+ concurrent-ruby (1.1.7)
23
+ crack (0.4.4)
24
+ diff-lcs (1.4.4)
25
+ ethon (0.12.0)
26
+ ffi (>= 1.3.0)
27
+ ffi (1.13.1)
28
+ hamster (3.0.0)
29
+ concurrent-ruby (~> 1.0)
30
+ hashdiff (1.0.1)
31
+ htmlentities (4.3.4)
32
+ i18n (1.8.5)
33
+ concurrent-ruby (~> 1.0)
34
+ link_header (0.0.8)
35
+ method_source (1.0.0)
36
+ mini_portile2 (2.4.0)
37
+ minitest (5.14.2)
38
+ nokogiri (1.10.10)
39
+ mini_portile2 (~> 2.4.0)
40
+ pry (0.13.1)
41
+ coderay (~> 1.1)
42
+ method_source (~> 1.0)
43
+ public_suffix (4.0.6)
44
+ rake (12.3.3)
45
+ rdf (3.1.6)
46
+ hamster (~> 3.0)
47
+ link_header (~> 0.0, >= 0.0.8)
48
+ rdf-microdata (3.1.1)
49
+ htmlentities (~> 4.3)
50
+ nokogiri (~> 1.10)
51
+ rdf (~> 3.1)
52
+ rdf-xsd (~> 3.1)
53
+ rdf-xsd (3.1.0)
54
+ rdf (~> 3.1)
55
+ rspec (3.9.0)
56
+ rspec-core (~> 3.9.0)
57
+ rspec-expectations (~> 3.9.0)
58
+ rspec-mocks (~> 3.9.0)
59
+ rspec-core (3.9.2)
60
+ rspec-support (~> 3.9.3)
61
+ rspec-expectations (3.9.2)
62
+ diff-lcs (>= 1.2.0, < 2.0)
63
+ rspec-support (~> 3.9.0)
64
+ rspec-mocks (3.9.1)
65
+ diff-lcs (>= 1.2.0, < 2.0)
66
+ rspec-support (~> 3.9.0)
67
+ rspec-support (3.9.3)
68
+ thread_safe (0.3.6)
69
+ typhoeus (1.4.0)
70
+ ethon (>= 0.9.0)
71
+ tzinfo (1.2.7)
72
+ thread_safe (~> 0.1)
73
+ vcr (6.0.0)
74
+ webmock (3.9.1)
75
+ addressable (>= 2.3.6)
76
+ crack (>= 0.3.2)
77
+ hashdiff (>= 0.4.0, < 2.0.0)
78
+ zeitwerk (2.4.0)
79
+
80
+ PLATFORMS
81
+ ruby
82
+
83
+ DEPENDENCIES
84
+ network_profile!
85
+ pry
86
+ rake (~> 12.0)
87
+ rspec (>= 3.5)
88
+ vcr
89
+ webmock
90
+
91
+ BUNDLED WITH
92
+ 2.1.4
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Stefan Wienert
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,75 @@
1
+ # NetworkProfile
2
+
3
+ Extractor Gem to analyse random strings for profile links of user. E.g. User uploads a PDF, scan it for all references to a social network profile.
4
+
5
+ This work is extracted from the German Applicant Tracking System EBMS (https://bms.empfehlungsbund.de).
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'network_profile'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle install
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install network_profile
22
+
23
+ ## Usage
24
+
25
+ ### Parse and extract one link
26
+
27
+
28
+ extraction = NetworkProfile.parse('https://github.com/zealot128', include_fallback_custom: true)
29
+
30
+ - ``include_fallback_custom: true`` uses the default extractor (og/meta-tags) if no other more specific extractor is found
31
+ - ``include_fallback_custom: false`` only use the specific website extractors and return nil if none matches the link
32
+
33
+ ### Scan a whole long string for links
34
+
35
+ links = NetworkProfile::Extractor.call("Very long String with even broken links in it www . github . com/zealot128")
36
+
37
+ ### Config
38
+
39
+ NetworkProfile.headers = {
40
+ 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
41
+ 'Accept-Language' => 'de,en-US;q=0.7,en;q=0.3',
42
+ 'Referer' => 'https://www.google.com',
43
+ 'DNT' => '1',
44
+ 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:73.0) Gecko/20100101 Firefox/73.0',
45
+ }
46
+ NetworkProfile.github_api_key = nil
47
+
48
+
49
+ ## Extractor
50
+
51
+ The following network profiles are supported:
52
+
53
+ **GithubProfile/Company, GithubProject**:
54
+
55
+ - uses GH's GraphQL API (Thus a API KEY is required)
56
+
57
+ **Instagram** **Facebook** **Linkedin**
58
+
59
+ - Because those websites are closed and defensive as hell, there is no extraction, just a simple matching (e.g. "Facebook profile")
60
+
61
+ **Stackoverflow**
62
+
63
+ - Uses SO's API
64
+
65
+ **Upwork** **XING** **ResearchGate**
66
+
67
+ - Custom Website Scraper/Extract JSON+LD
68
+
69
+ **Default Fallback** (Custom)
70
+
71
+ - OG-Meta-Tags / HTML-Meta-Tags
72
+
73
+ ## License
74
+
75
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "network_profile"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rake' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rake", "rake")
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rspec' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rspec-core", "rspec")
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,23 @@
1
+ require "network_profile/version"
2
+ require 'network_profile/extractor'
3
+ require 'network_profile/extractors/default_profile'
4
+ require 'active_support/core_ext/module/attribute_accessors'
5
+
6
+ module NetworkProfile
7
+ class Error < StandardError; end
8
+
9
+ mattr_accessor :headers, :github_api_key
10
+
11
+ self.headers = {
12
+ 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
13
+ 'Accept-Language' => 'de,en-US;q=0.7,en;q=0.3',
14
+ 'Referer' => 'https://www.google.com',
15
+ 'DNT' => '1',
16
+ 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:73.0) Gecko/20100101 Firefox/73.0',
17
+ }
18
+ self.github_api_key = nil
19
+
20
+ def self.parse(link, include_fallback_custom: false)
21
+ NetworkProfile::DefaultProfile.parse(link, include_fallback_custom: include_fallback_custom)
22
+ end
23
+ end
@@ -0,0 +1,65 @@
1
+ require 'active_support/core_ext/string/filters'
2
+ class NetworkProfile::Extractor
3
+ # Logic from:
4
+ # https://github.com/tenderlove/rails_autolink/blob/master/lib/rails_autolink/helpers.rb
5
+ AUTO_LINK_RE = %r{
6
+ (?: ((?:ed2k|ftp|http|https|irc|mailto|news|gopher|nntp|telnet|webcal|xmpp|callto|feed|svn|urn|aim|rsync|tag|ssh|sftp|rtsp|afs|file):)// | www\. )
7
+ [^\s<\u00A0"]+
8
+ }ix.freeze
9
+ WORD_PATTERN = '\p{Word}'.freeze
10
+ BRACKETS = { ']' => '[', ')' => '(', '}' => '{' }.freeze
11
+
12
+ def self.call(string)
13
+ new(string).extracted_links!
14
+ end
15
+
16
+ def initialize(string)
17
+ @string = string
18
+ end
19
+
20
+ def extracted_links!
21
+ extracted = links.map do |l|
22
+ NetworkProfile.parse(l)
23
+ rescue StandardError => e
24
+ p e
25
+ nil
26
+ end
27
+ extracted.compact
28
+ end
29
+
30
+ def links
31
+ return @links if @links
32
+
33
+ @links ||= []
34
+ mapped_string.scan(AUTO_LINK_RE) { |_|
35
+ scheme = Regexp.last_match(1)
36
+ href = $&
37
+ punctuation = []
38
+ while href.sub!(%r{[^#{WORD_PATTERN}/-=&]$}, '')
39
+ punctuation.push($&)
40
+ if opening = BRACKETS[punctuation.last] and href.scan(opening).size > href.scan(punctuation.last).size
41
+ href << punctuation.pop
42
+ break
43
+ end
44
+ end
45
+ href = 'https://' + href unless scheme
46
+ @links << href
47
+ }
48
+ @links.uniq
49
+ end
50
+
51
+ TLD = /(?<tld>com|de|net|fr|at|ch|info)/.freeze
52
+ HOST_PART = %r{(?<host>[a-z\-\.0-9]+)}.freeze
53
+
54
+ def mapped_string
55
+ @string.
56
+ gsub(%r{ (#{HOST_PART}\.#{TLD}/)}) { |_|
57
+ host = Regexp.last_match['host']
58
+ "https://#{host}.#{Regexp.last_match['tld']}/"
59
+ }.
60
+ gsub(%r{ www *\. +#{HOST_PART} *\. *#{TLD}(?<path>[^<\u00A0"]+)}) { |_|
61
+ path = Regexp.last_match['path'].remove(' ')
62
+ "www.#{Regexp.last_match['host']}.#{Regexp.last_match['tld']}#{path}"
63
+ }
64
+ end
65
+ end
@@ -0,0 +1,9 @@
1
+ module NetworkProfile
2
+ class Custom < DefaultProfile
3
+ self.mdi_icon = 'open-in-new'
4
+
5
+ def self.handle?(link)
6
+ true
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,129 @@
1
+ require 'rdf/microdata'
2
+ require 'active_support/descendants_tracker'
3
+ require 'active_support/core_ext/module/attribute_accessors'
4
+ require 'active_support/core_ext/string/inflections'
5
+ require 'nokogiri'
6
+ require 'typhoeus'
7
+
8
+ module NetworkProfile
9
+ class DefaultProfile
10
+ include ActiveSupport::DescendantsTracker
11
+
12
+ cattr_accessor :mdi_icon
13
+
14
+ class << self
15
+ attr_accessor :headers
16
+ end
17
+
18
+ def self.auto_extractor_link_types
19
+ [
20
+ NetworkProfile::GithubProfile,
21
+ NetworkProfile::GithubProject,
22
+ NetworkProfile::LinkedinProfile,
23
+ NetworkProfile::InstagramProfile,
24
+ NetworkProfile::XingProfile,
25
+ NetworkProfile::ResearchgateProfile,
26
+ NetworkProfile::UpworkProfile,
27
+ NetworkProfile::FacebookProfile,
28
+ NetworkProfile::StackoverflowProfile,
29
+ ].freeze
30
+ end
31
+
32
+ def self.all_types
33
+ auto_extractor_link_types + [NetworkProfile::Custom]
34
+ end
35
+
36
+ def self.parse(link, include_fallback_custom: false)
37
+ link_type = (include_fallback_custom ? all_types : auto_extractor_link_types).find { |i| i.handle?(link) }
38
+ if link_type
39
+ link_type.new(link.strip).data
40
+ end
41
+ end
42
+
43
+ def initialize(link)
44
+ @link = link
45
+ end
46
+
47
+ def image
48
+ img = doc.at('meta[property=og\:image]')&.[]('content')
49
+ if img && img[%r{^/\w+}]
50
+ img = URI.join(@link, img).to_s
51
+ end
52
+ img
53
+ end
54
+
55
+ def title
56
+ doc.at('title')&.text
57
+ end
58
+
59
+ def text
60
+ doc.at('meta[property=og\:description]')&.[]('content') || doc.at('meta[name=description]')&.[]('content')
61
+ end
62
+
63
+ def data
64
+ {
65
+ site_icon: mdi_icon,
66
+ link: @link,
67
+ title: title,
68
+ text: text,
69
+ image: image,
70
+ type: self.class.name.underscore.split('/').last
71
+ }.merge(extra_data)
72
+ end
73
+
74
+ def extra_data
75
+ {}
76
+ end
77
+
78
+ private
79
+
80
+ def response
81
+ @response ||= Typhoeus.get(@link, headers: NetworkProfile.headers, followlocation: true)
82
+ end
83
+
84
+ def doc
85
+ @doc ||= Nokogiri.parse(response.body)
86
+ end
87
+
88
+ def json_ld
89
+ @json_ld ||= JSON.parse(doc.search('script[type*=ld]').first.text)
90
+ end
91
+
92
+ def rdf
93
+ @rdf ||= map_rdf(
94
+ RDF::Microdata::Reader.new(response.body).to_h
95
+ )
96
+ end
97
+
98
+ def map_rdf(tree)
99
+ tree.
100
+ transform_keys { |v| map_rdf_value(v) }.
101
+ transform_values { |v| map_rdf_value(v) }
102
+ end
103
+
104
+ def map_rdf_value(value)
105
+ case value
106
+ when RDF::Vocabulary::Term then value.fragment
107
+ when RDF::URI then value.to_base
108
+ when RDF::Node then value.id
109
+ when RDF::Literal then value.value
110
+ when Hash then map_rdf(value)
111
+ when Array then value.map { |i| map_rdf_value(i) }
112
+ else
113
+ value
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ require_relative './custom'
120
+ require_relative './github_profile'
121
+ require_relative './github_project'
122
+ require_relative './linkedin_profile'
123
+ require_relative './instagram_profile'
124
+ require_relative './xing_profile'
125
+ require_relative './researchgate_profile'
126
+ require_relative './upwork_profile'
127
+ require_relative './facebook_profile'
128
+ require_relative './stackoverflow_profile'
129
+
@@ -0,0 +1,15 @@
1
+ require_relative './network_profile_without_extraction'
2
+
3
+ module NetworkProfile
4
+ class FacebookProfile < NetworkProfileWithoutExtraction
5
+ self.mdi_icon = 'facebook'
6
+
7
+ def self.handle?(link)
8
+ (e = link[%r{facebook.com/([\w\.]+)}, 1]) && e.length > 3 && e != 'groups'
9
+ end
10
+
11
+ def profile_description
12
+ "Facebook Profil:"
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,24 @@
1
+ require 'active_support/core_ext/hash/indifferent_access'
2
+
3
+ module NetworkProfile::GithubGraphql
4
+ def query!(query)
5
+ r = Typhoeus.post("https://api.github.com/graphql",
6
+ body: { query: query }.to_json,
7
+ headers: {
8
+ "Authorization": "bearer #{NetworkProfile.github_api_key}"
9
+ })
10
+ if r.success?
11
+ JSON.parse(r.body).with_indifferent_access
12
+ else
13
+ raise ArgumentError, "Fetching query failed: #{r.code}"
14
+ end
15
+ end
16
+
17
+ def json
18
+ @json ||= query!(query)
19
+ end
20
+
21
+ def doc
22
+ raise NotImplementedError
23
+ end
24
+ end
@@ -0,0 +1,98 @@
1
+ require_relative './github_graphql'
2
+
3
+ module NetworkProfile
4
+ class GithubProfile < DefaultProfile
5
+ include GithubGraphql
6
+ self.mdi_icon = 'github'
7
+
8
+ def self.handle?(link)
9
+ link.to_s[%r{github.com/[^/]+/?$}] && NetworkProfile.github_api_key
10
+ end
11
+
12
+ def query
13
+ username = @link[%r{github.com/([^/]+)}, 1]
14
+ <<~DOC
15
+ query {
16
+ organization(login:"#{username}") {
17
+ avatarUrl
18
+ name
19
+ bio: description
20
+ location
21
+ websiteUrl
22
+ ...RepoFragment
23
+ }
24
+
25
+ user(login:"#{username}") {
26
+ avatarUrl
27
+ name
28
+ bio
29
+ company
30
+ location
31
+ websiteUrl
32
+ followers {
33
+ totalCount
34
+ }
35
+ ...RepoFragment
36
+ }
37
+ }
38
+ fragment RepoFragment on ProfileOwner {
39
+ pinnedItems(first: 9, types: [REPOSITORY]) {
40
+ edges {
41
+ node {
42
+ ... on Repository {
43
+ nameWithOwner,
44
+ url,
45
+ createdAt,
46
+ updatedAt
47
+ stargazers { totalCount }
48
+ watchers {
49
+ totalCount
50
+ },
51
+ primaryLanguage {
52
+ name
53
+ }
54
+ }
55
+ }
56
+ }
57
+ }
58
+ }
59
+ DOC
60
+ end
61
+
62
+ def profile_data
63
+ json.dig('data', 'organization') || json.dig('data', 'user')
64
+ end
65
+
66
+ def title
67
+ profile_data['name']
68
+ end
69
+
70
+ def text
71
+ profile_data['bio']
72
+ end
73
+
74
+ def image
75
+ profile_data['avatarUrl']
76
+ end
77
+
78
+ def extra_data
79
+ {
80
+ company: profile_data['company'],
81
+ location: profile_data['location'],
82
+ profile_type: json.dig('data', 'organization') ? "organization" : "user",
83
+ followers: profile_data.dig('followers', 'totalCount'),
84
+ website: profile_data.dig('websiteUrl'),
85
+ pinned: profile_data.dig('pinnedItems', 'edges').map { |i|
86
+ n = i['node']
87
+ { name: n['nameWithOwner'],
88
+ url: n['url'],
89
+ created: Time.parse(n['createdAt']).to_date,
90
+ updated: Time.parse(n['updatedAt']).to_date,
91
+ language: n.dig('primaryLanguage', 'name'),
92
+ stars: n['stargazers']['totalCount'],
93
+ watchers: n['watchers']['totalCount'] }
94
+ }
95
+ }
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,88 @@
1
+ require_relative './github_graphql'
2
+
3
+ module NetworkProfile
4
+ class GithubProject < DefaultProfile
5
+ include GithubGraphql
6
+ self.mdi_icon = 'github'
7
+
8
+ def self.handle?(link)
9
+ link.to_s[%r{github.com/[^/]+/.+}] && NetworkProfile.github_api_key
10
+ end
11
+
12
+ def query
13
+ _, author, repo = @link.match(%r{github.com/([^/]+)/([^/\?]+)(\.git)?}).to_a
14
+ <<~DOC
15
+ query {
16
+ repository(name:"#{repo}", owner: "#{author}") {
17
+ createdAt
18
+ description
19
+ nameWithOwner
20
+ updatedAt
21
+ languages(first:10) {
22
+ edges {
23
+ node {
24
+ name
25
+ }
26
+ size
27
+ }
28
+ totalCount
29
+ }
30
+ licenseInfo { name }
31
+ forkCount
32
+ isFork
33
+ defaultBranchRef {
34
+ name
35
+ target {
36
+ ... on Commit {
37
+ committedDate
38
+ history(first: 0) {
39
+ totalCount
40
+ }
41
+ }
42
+ }
43
+ }
44
+ issues {
45
+ totalCount
46
+ }
47
+ stargazers {
48
+ totalCount
49
+ }
50
+ watchers {
51
+ totalCount
52
+ }
53
+ }
54
+ }
55
+ DOC
56
+ end
57
+
58
+ def title
59
+ json.dig('data', 'repository', 'nameWithOwner')
60
+ end
61
+
62
+ def text
63
+ json.dig('data', 'repository', 'description')
64
+ end
65
+
66
+ def last_commit
67
+ Time.parse(json.dig('data', 'repository', 'defaultBranchRef', 'target', 'committedDate')).to_date
68
+ end
69
+
70
+ def image
71
+ nil
72
+ end
73
+
74
+ def extra_data
75
+ {
76
+ watchers: json.dig('data', 'repository', 'watchers', 'totalCount'),
77
+ forks: json.dig('data', 'repository', 'forkCount'),
78
+ stars: json.dig('data', 'repository', 'stargazers', 'totalCount'),
79
+ issue_count: json.dig('data', 'repository', 'issues', 'totalCount'),
80
+ commits: json.dig('data', 'repository', 'defaultBranchRef', 'target', 'history', 'totalCount'),
81
+ license: json.dig('data', 'repository', 'licenseInfo', 'name'),
82
+ created: Time.parse(json.dig('data', 'repository', 'createdAt')).to_date,
83
+ language_bytes: json.dig('data', 'repository', 'languages', 'edges')&.map { |l| [l.dig('node', 'name'), l['size']] }&.sort_by { |_a, b| -b },
84
+ last_commit: last_commit
85
+ }
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,14 @@
1
+ require_relative './network_profile_without_extraction'
2
+
3
+ module NetworkProfile
4
+ class InstagramProfile < NetworkProfileWithoutExtraction
5
+ self.mdi_icon = 'instagram'
6
+ def self.handle?(link)
7
+ (e = link[%r{instagram.com/([\w\.]+)}, 1]) && e.length > 3 && e != 'groups'
8
+ end
9
+
10
+ def profile_description
11
+ "Instagram Profil:"
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,14 @@
1
+ require_relative './network_profile_without_extraction'
2
+
3
+ module NetworkProfile
4
+ class LinkedinProfile < NetworkProfileWithoutExtraction
5
+ self.mdi_icon = 'linkedin'
6
+ def self.handle?(link)
7
+ link['linkedin.com/in/']
8
+ end
9
+
10
+ def profile_description
11
+ "LinkedIn Profil:"
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,25 @@
1
+ require 'active_support/core_ext/string/inflections'
2
+ require_relative './default_profile.rb'
3
+
4
+ module NetworkProfile
5
+ class NetworkProfileWithoutExtraction < DefaultProfile
6
+ def profile_description
7
+ "Profil: "
8
+ end
9
+
10
+ def title
11
+ "#{profile_description} #{@link.split('/').last}"
12
+ end
13
+
14
+ def data
15
+ {
16
+ title: title,
17
+ text: "",
18
+ image: nil,
19
+ type: self.class.name.underscore.split('/').last,
20
+ link: @link,
21
+ site_icon: mdi_icon,
22
+ }
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,44 @@
1
+ module NetworkProfile
2
+ class ResearchgateProfile < DefaultProfile
3
+ self.mdi_icon = ''
4
+
5
+ def self.handle?(link)
6
+ link['researchgate.net/profile/']
7
+ end
8
+
9
+ def json_ld
10
+ original = super
11
+ if original['@graph']
12
+ original['@graph'].first
13
+ else
14
+ original
15
+ end
16
+ end
17
+
18
+ def title
19
+ json_ld['name']
20
+ end
21
+
22
+ def text
23
+ json_ld.dig('affiliation', 'name') || doc.at('.org')&.text
24
+ end
25
+
26
+ def last_item
27
+ item = rdf.find { |_, v| v['type'].to_s['ScholarlyArticle'] }.last
28
+ return unless item
29
+
30
+ title = item.dig('<http://schema.org/headline>', 0)
31
+ date = item.dig('<http://schema.org/datePublished>', 0)
32
+ "#{title} (#{date})"
33
+ end
34
+
35
+ def extra_data
36
+ items, reads, citations = doc.at(".profile-content-item .nova-c-card").
37
+ search(".nova-o-grid__column").
38
+ map { |col| col.search('.nova-e-text').map(&:text) }.
39
+ map(&:first).map { |i| i.gsub(',', '').to_i }
40
+
41
+ { items: items, reads: reads, citations: citations, last_item: last_item }
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,81 @@
1
+ module NetworkProfile
2
+ # Tags:
3
+ # https://api.stackexchange.com/2.2/users/220292/top-tags?pagesize=10&site=stackoverflow
4
+
5
+ class StackoverflowProfile < DefaultProfile
6
+ self.mdi_icon = 'stack-overflow'
7
+ SITES = [
8
+ ["https://stackoverflow.com", "stackoverflow", "Stack Overflow"],
9
+ ["https://serverfault.com", "serverfault", "Server Fault"],
10
+ ["https://superuser.com", "superuser", "Super User"],
11
+ ["https://webapps.stackexchange.com", "webapps", "Web Applications"],
12
+ ["https://gaming.stackexchange.com", "gaming", "Arqade"],
13
+ ["https://webmasters.stackexchange.com", "webmasters", "Webmasters"],
14
+ ["https://cooking.stackexchange.com", "cooking", "Seasoned Advice"],
15
+ ["https://gamedev.stackexchange.com", "gamedev", "Game Development"],
16
+ ["https://photo.stackexchange.com", "photo", "Photography"],
17
+ ["https://stats.stackexchange.com", "stats", "Cross Validated"],
18
+ ["https://math.stackexchange.com", "math", "Mathematics"],
19
+ ["https://diy.stackexchange.com", "diy", "Home Improvement"],
20
+ ["https://gis.stackexchange.com", "gis", "Geographic Information Systems"],
21
+ ["https://tex.stackexchange.com", "tex", "TeX - LaTeX"],
22
+ ["https://askubuntu.com", "askubuntu", "Ask Ubuntu"],
23
+ ].freeze
24
+
25
+ def self.handle?(link)
26
+ SITES.any? { |s, _, _| link.include?(s + "/users/") }
27
+ end
28
+
29
+ def site
30
+ @site ||= SITES.find { |s, _, _| @link.include?(s) }
31
+ end
32
+
33
+ def title
34
+ user_api['display_name']
35
+ end
36
+
37
+ def image
38
+ user_api['profile_image']
39
+ end
40
+
41
+ def text
42
+ end
43
+
44
+ def extra_data
45
+ {
46
+ reputation: user_api.dig('reputation'),
47
+ created: Time.at(user_api['creation_date']).to_date,
48
+ location: user_api.dig('location'),
49
+ site: site[2],
50
+ site_logo: "https://cdn.sstatic.net/Sites/#{site[1]}/img/apple-touch-icon.png",
51
+ tags: tags_api.map { |j| [j['tag_name'], j['answer_score'] + j['question_score']] }
52
+ }
53
+ end
54
+
55
+ def user_id
56
+ URI.parse(@link).path[%r{/users/(\d+)/?}, 1]
57
+ end
58
+
59
+ private
60
+
61
+ def user_api
62
+ @user_api ||=
63
+ begin
64
+ url = "https://api.stackexchange.com/2.2/users/#{user_id}?order=desc&sort=reputation&site=#{site[1]}"
65
+ api_call(url).dig('items', 0)
66
+ end
67
+ end
68
+
69
+ def tags_api
70
+ @tags_api ||=
71
+ begin
72
+ url = "https://api.stackexchange.com/2.2/users/#{user_id}/top-tags?pagesize=10&site=#{site[1]}"
73
+ api_call(url).dig('items')
74
+ end
75
+ end
76
+
77
+ def api_call(url)
78
+ JSON.parse(Typhoeus.get(url, accept_encoding: 'gzip').body)
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,42 @@
1
+ require 'active_support/core_ext/string/filters'
2
+
3
+ module NetworkProfile
4
+ class UpworkProfile < DefaultProfile
5
+ self.mdi_icon = 'upwork'
6
+ def self.handle?(link)
7
+ link[%r{upwork.com/o/profiles/users/.+}]
8
+ end
9
+
10
+ def title
11
+ php_vars.dig('profile', 'profile', 'name')
12
+ end
13
+
14
+ def text
15
+ doc.at('h2 strong').text
16
+ end
17
+
18
+ def php_vars
19
+ @php_vars ||=
20
+ begin
21
+ t = doc.search('script').find { |i| i && i.text['PROFILE_RESPONSE'] }.text
22
+ t.remove!(/window.PROFILE_RESPONSE=.*summary:/)
23
+ JSON.parse(t.remove(/\}$/))
24
+ end
25
+ end
26
+
27
+ def extra_data
28
+ profile = php_vars.dig('profile')
29
+ rate = profile.dig('stats', 'hourlyRate')
30
+ {
31
+ country: profile.dig('profile', 'location').yield_self { |v| "#{v['city']}, #{v['country']}" },
32
+ hours: profile.dig('stats', 'totalHours').floor,
33
+ jobs: profile.dig('stats', 'totalJobsWorked').floor,
34
+ rating: profile.dig('stats', 'rating').round(2),
35
+ hourly_rate: "#{rate['amount']} #{rate['currencyCode']}",
36
+ english_level: profile['stats']['englishLevel'],
37
+ hire_again: profile.dig('stats', 'hireAgainPercentage'),
38
+ skills: profile.dig('profile', 'skills').map { |i| i['prettyName'] }
39
+ }
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,25 @@
1
+ module NetworkProfile
2
+ class XingProfile < DefaultProfile
3
+ self.mdi_icon = 'xing'
4
+
5
+ def self.handle?(link)
6
+ link['xing.com/profile/']
7
+ end
8
+
9
+ def title
10
+ doc.at('h1').text.strip
11
+ end
12
+
13
+ def text
14
+ json_ld.dig('jobTitle')
15
+ end
16
+
17
+ def extra_data
18
+ {
19
+ employment_status: doc.at('[data-qa=xing-id-work_experience]')&.text&.split(', ')&.first,
20
+ tags: json_ld&.fetch('makesOffer', [])&.map { |i| i['name'] } || [],
21
+ languages: doc.at('[data-qa=language-skills-section]')&.search('li')&.map { |i| "#{i.at('h3').text} (#{i.at('div').text})" },
22
+ }
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module NetworkProfile
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,31 @@
1
+ require_relative 'lib/network_profile/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "network_profile"
5
+ spec.version = NetworkProfile::VERSION
6
+ spec.authors = ["Stefan Wienert"]
7
+ spec.email = ["info@stefanwienert.de"]
8
+
9
+ spec.summary = %q{Extract profile metadata from various social-media-profiles}
10
+ spec.description = %q{Extract profile metadata from various social-media-profiles, such as Twitter, XING, Github, Stackoverflow or generic og-metatags.}
11
+ spec.homepage = "https://github.com/pludoni/network_profile"
12
+ spec.license = "MIT"
13
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
+
15
+ spec.metadata["homepage_uri"] = spec.homepage
16
+ spec.metadata["source_code_uri"] = "https://github.com/pludoni/network_profile"
17
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
18
+
19
+ # Specify which files should be added to the gem when it is released.
20
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
22
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
+ end
24
+ spec.bindir = "exe"
25
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
+ spec.require_paths = ["lib"]
27
+ spec.add_dependency "typhoeus"
28
+ spec.add_dependency "rdf-microdata"
29
+ spec.add_dependency "activesupport", ">= 5.0.0"
30
+ spec.add_dependency "nokogiri"
31
+ end
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: network_profile
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Stefan Wienert
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-09-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: typhoeus
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdf-microdata
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: activesupport
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 5.0.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 5.0.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Extract profile metadata from various social-media-profiles, such as
70
+ Twitter, XING, Github, Stackoverflow or generic og-metatags.
71
+ email:
72
+ - info@stefanwienert.de
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - ".github/workflows/verify.yml"
78
+ - ".gitignore"
79
+ - ".rspec"
80
+ - Gemfile
81
+ - Gemfile.lock
82
+ - LICENSE.txt
83
+ - README.md
84
+ - Rakefile
85
+ - bin/console
86
+ - bin/rake
87
+ - bin/rspec
88
+ - bin/setup
89
+ - lib/network_profile.rb
90
+ - lib/network_profile/extractor.rb
91
+ - lib/network_profile/extractors/custom.rb
92
+ - lib/network_profile/extractors/default_profile.rb
93
+ - lib/network_profile/extractors/facebook_profile.rb
94
+ - lib/network_profile/extractors/github_graphql.rb
95
+ - lib/network_profile/extractors/github_profile.rb
96
+ - lib/network_profile/extractors/github_project.rb
97
+ - lib/network_profile/extractors/instagram_profile.rb
98
+ - lib/network_profile/extractors/linkedin_profile.rb
99
+ - lib/network_profile/extractors/network_profile_without_extraction.rb
100
+ - lib/network_profile/extractors/researchgate_profile.rb
101
+ - lib/network_profile/extractors/stackoverflow_profile.rb
102
+ - lib/network_profile/extractors/upwork_profile.rb
103
+ - lib/network_profile/extractors/xing_profile.rb
104
+ - lib/network_profile/version.rb
105
+ - network_profile.gemspec
106
+ homepage: https://github.com/pludoni/network_profile
107
+ licenses:
108
+ - MIT
109
+ metadata:
110
+ homepage_uri: https://github.com/pludoni/network_profile
111
+ source_code_uri: https://github.com/pludoni/network_profile
112
+ post_install_message:
113
+ rdoc_options: []
114
+ require_paths:
115
+ - lib
116
+ required_ruby_version: !ruby/object:Gem::Requirement
117
+ requirements:
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: 2.3.0
121
+ required_rubygems_version: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ requirements: []
127
+ rubyforge_project:
128
+ rubygems_version: 2.7.6
129
+ signing_key:
130
+ specification_version: 4
131
+ summary: Extract profile metadata from various social-media-profiles
132
+ test_files: []