network_profile 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/verify.yml +23 -0
- data/.gitignore +10 -0
- data/.rspec +1 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +92 -0
- data/LICENSE.txt +21 -0
- data/README.md +75 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/rake +29 -0
- data/bin/rspec +29 -0
- data/bin/setup +8 -0
- data/lib/network_profile.rb +23 -0
- data/lib/network_profile/extractor.rb +65 -0
- data/lib/network_profile/extractors/custom.rb +9 -0
- data/lib/network_profile/extractors/default_profile.rb +129 -0
- data/lib/network_profile/extractors/facebook_profile.rb +15 -0
- data/lib/network_profile/extractors/github_graphql.rb +24 -0
- data/lib/network_profile/extractors/github_profile.rb +98 -0
- data/lib/network_profile/extractors/github_project.rb +88 -0
- data/lib/network_profile/extractors/instagram_profile.rb +14 -0
- data/lib/network_profile/extractors/linkedin_profile.rb +14 -0
- data/lib/network_profile/extractors/network_profile_without_extraction.rb +25 -0
- data/lib/network_profile/extractors/researchgate_profile.rb +44 -0
- data/lib/network_profile/extractors/stackoverflow_profile.rb +81 -0
- data/lib/network_profile/extractors/upwork_profile.rb +42 -0
- data/lib/network_profile/extractors/xing_profile.rb +25 -0
- data/lib/network_profile/version.rb +3 -0
- data/network_profile.gemspec +31 -0
- metadata +132 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: eaa6afbf057402529a5e99c22ff61b98b1af6bcb8ca03cd869ddf03ba0e6893f
|
|
4
|
+
data.tar.gz: 1cd9b2fe99baba7cbc8f6774d8cd981bffa7e6860349df584d7041e9bb23aeb9
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 93002910a6cb60f7ca0434b2231a583e440e2d4bc9a4e23e3721d021ee6af70acf959f0b012932f65dc175e5e209edba490add5780137d9e6afb9af3ebe6fa7b
|
|
7
|
+
data.tar.gz: 649cea60a5a341322cb3927aac5cd35b59bf374632694d9443c6dc37cfacf00a4afefe42336e350dce742d06c3ee81a3512eaf5407a82e31f10af77c5e86d97d
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: Verify
|
|
2
|
+
on: [push]
|
|
3
|
+
|
|
4
|
+
jobs:
|
|
5
|
+
tests:
|
|
6
|
+
name: Tests
|
|
7
|
+
runs-on: ubuntu-latest
|
|
8
|
+
strategy:
|
|
9
|
+
matrix:
|
|
10
|
+
# ruby: [ '2.5', '2.6', '2.7' ]
|
|
11
|
+
ruby: [ '2.6' ]
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v2
|
|
14
|
+
- uses: ruby/setup-ruby@v1
|
|
15
|
+
with:
|
|
16
|
+
ruby-version: ${{ matrix.ruby }}
|
|
17
|
+
- name: Install gems
|
|
18
|
+
run: |
|
|
19
|
+
bundle config path vendor/bundle
|
|
20
|
+
bundle install --jobs 4 --retry 3
|
|
21
|
+
- name: Run tests
|
|
22
|
+
run: bin/rspec
|
|
23
|
+
|
data/.gitignore
ADDED
data/.rspec
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--require spec_helper
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
network_profile (0.1.0)
|
|
5
|
+
activesupport (>= 5.0.0)
|
|
6
|
+
nokogiri
|
|
7
|
+
rdf-microdata
|
|
8
|
+
typhoeus
|
|
9
|
+
|
|
10
|
+
GEM
|
|
11
|
+
remote: https://rubygems.org/
|
|
12
|
+
specs:
|
|
13
|
+
activesupport (6.0.3.3)
|
|
14
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
15
|
+
i18n (>= 0.7, < 2)
|
|
16
|
+
minitest (~> 5.1)
|
|
17
|
+
tzinfo (~> 1.1)
|
|
18
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
|
19
|
+
addressable (2.7.0)
|
|
20
|
+
public_suffix (>= 2.0.2, < 5.0)
|
|
21
|
+
coderay (1.1.3)
|
|
22
|
+
concurrent-ruby (1.1.7)
|
|
23
|
+
crack (0.4.4)
|
|
24
|
+
diff-lcs (1.4.4)
|
|
25
|
+
ethon (0.12.0)
|
|
26
|
+
ffi (>= 1.3.0)
|
|
27
|
+
ffi (1.13.1)
|
|
28
|
+
hamster (3.0.0)
|
|
29
|
+
concurrent-ruby (~> 1.0)
|
|
30
|
+
hashdiff (1.0.1)
|
|
31
|
+
htmlentities (4.3.4)
|
|
32
|
+
i18n (1.8.5)
|
|
33
|
+
concurrent-ruby (~> 1.0)
|
|
34
|
+
link_header (0.0.8)
|
|
35
|
+
method_source (1.0.0)
|
|
36
|
+
mini_portile2 (2.4.0)
|
|
37
|
+
minitest (5.14.2)
|
|
38
|
+
nokogiri (1.10.10)
|
|
39
|
+
mini_portile2 (~> 2.4.0)
|
|
40
|
+
pry (0.13.1)
|
|
41
|
+
coderay (~> 1.1)
|
|
42
|
+
method_source (~> 1.0)
|
|
43
|
+
public_suffix (4.0.6)
|
|
44
|
+
rake (12.3.3)
|
|
45
|
+
rdf (3.1.6)
|
|
46
|
+
hamster (~> 3.0)
|
|
47
|
+
link_header (~> 0.0, >= 0.0.8)
|
|
48
|
+
rdf-microdata (3.1.1)
|
|
49
|
+
htmlentities (~> 4.3)
|
|
50
|
+
nokogiri (~> 1.10)
|
|
51
|
+
rdf (~> 3.1)
|
|
52
|
+
rdf-xsd (~> 3.1)
|
|
53
|
+
rdf-xsd (3.1.0)
|
|
54
|
+
rdf (~> 3.1)
|
|
55
|
+
rspec (3.9.0)
|
|
56
|
+
rspec-core (~> 3.9.0)
|
|
57
|
+
rspec-expectations (~> 3.9.0)
|
|
58
|
+
rspec-mocks (~> 3.9.0)
|
|
59
|
+
rspec-core (3.9.2)
|
|
60
|
+
rspec-support (~> 3.9.3)
|
|
61
|
+
rspec-expectations (3.9.2)
|
|
62
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
63
|
+
rspec-support (~> 3.9.0)
|
|
64
|
+
rspec-mocks (3.9.1)
|
|
65
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
66
|
+
rspec-support (~> 3.9.0)
|
|
67
|
+
rspec-support (3.9.3)
|
|
68
|
+
thread_safe (0.3.6)
|
|
69
|
+
typhoeus (1.4.0)
|
|
70
|
+
ethon (>= 0.9.0)
|
|
71
|
+
tzinfo (1.2.7)
|
|
72
|
+
thread_safe (~> 0.1)
|
|
73
|
+
vcr (6.0.0)
|
|
74
|
+
webmock (3.9.1)
|
|
75
|
+
addressable (>= 2.3.6)
|
|
76
|
+
crack (>= 0.3.2)
|
|
77
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
|
78
|
+
zeitwerk (2.4.0)
|
|
79
|
+
|
|
80
|
+
PLATFORMS
|
|
81
|
+
ruby
|
|
82
|
+
|
|
83
|
+
DEPENDENCIES
|
|
84
|
+
network_profile!
|
|
85
|
+
pry
|
|
86
|
+
rake (~> 12.0)
|
|
87
|
+
rspec (>= 3.5)
|
|
88
|
+
vcr
|
|
89
|
+
webmock
|
|
90
|
+
|
|
91
|
+
BUNDLED WITH
|
|
92
|
+
2.1.4
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2020 Stefan Wienert
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# NetworkProfile
|
|
2
|
+
|
|
3
|
+
Extractor Gem to analyse random strings for profile links of user. E.g. User uploads a PDF, scan it for all references to a social network profile.
|
|
4
|
+
|
|
5
|
+
This work is extracted from the German Applicant Tracking System EBMS (https://bms.empfehlungsbund.de).
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
Add this line to your application's Gemfile:
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
gem 'network_profile'
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
And then execute:
|
|
16
|
+
|
|
17
|
+
$ bundle install
|
|
18
|
+
|
|
19
|
+
Or install it yourself as:
|
|
20
|
+
|
|
21
|
+
$ gem install network_profile
|
|
22
|
+
|
|
23
|
+
## Usage
|
|
24
|
+
|
|
25
|
+
### Parse and extract one link
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
extraction = NetworkProfile.parse('https://github.com/zealot128', include_fallback_custom: true)
|
|
29
|
+
|
|
30
|
+
- ``include_fallback_custom: true`` uses the default extractor (og/meta-tags) if no other more specific extractor is found
|
|
31
|
+
- ``include_fallback_custom: false`` only use the specific website extractors and return nil if none matches the link
|
|
32
|
+
|
|
33
|
+
### Scan a whole long string for links
|
|
34
|
+
|
|
35
|
+
links = NetworkProfile::Extractor.call("Very long String with even broken links in it www . github . com/zealot128")
|
|
36
|
+
|
|
37
|
+
### Config
|
|
38
|
+
|
|
39
|
+
NetworkProfile.headers = {
|
|
40
|
+
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
41
|
+
'Accept-Language' => 'de,en-US;q=0.7,en;q=0.3',
|
|
42
|
+
'Referer' => 'https://www.google.com',
|
|
43
|
+
'DNT' => '1',
|
|
44
|
+
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:73.0) Gecko/20100101 Firefox/73.0',
|
|
45
|
+
}
|
|
46
|
+
NetworkProfile.github_api_key = nil
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## Extractor
|
|
50
|
+
|
|
51
|
+
The following network profiles are supported:
|
|
52
|
+
|
|
53
|
+
**GithubProfile/Company, GithubProject**:
|
|
54
|
+
|
|
55
|
+
- uses GH's GraphQL API (Thus a API KEY is required)
|
|
56
|
+
|
|
57
|
+
**Instagram** **Facebook** **Linkedin**
|
|
58
|
+
|
|
59
|
+
- Because those websites are closed and defensive as hell, there is no extraction, just a simple matching (e.g. "Facebook profile")
|
|
60
|
+
|
|
61
|
+
**Stackoverflow**
|
|
62
|
+
|
|
63
|
+
- Uses SO's API
|
|
64
|
+
|
|
65
|
+
**Upwork** **XING** **ResearchGate**
|
|
66
|
+
|
|
67
|
+
- Custom Website Scraper/Extract JSON+LD
|
|
68
|
+
|
|
69
|
+
**Default Fallback** (Custom)
|
|
70
|
+
|
|
71
|
+
- OG-Meta-Tags / HTML-Meta-Tags
|
|
72
|
+
|
|
73
|
+
## License
|
|
74
|
+
|
|
75
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "network_profile"
|
|
5
|
+
|
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
8
|
+
|
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
10
|
+
# require "pry"
|
|
11
|
+
# Pry.start
|
|
12
|
+
|
|
13
|
+
require "irb"
|
|
14
|
+
IRB.start(__FILE__)
|
data/bin/rake
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# This file was generated by Bundler.
|
|
6
|
+
#
|
|
7
|
+
# The application 'rake' is installed as part of a gem, and
|
|
8
|
+
# this file is here to facilitate running it.
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require "pathname"
|
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
|
13
|
+
Pathname.new(__FILE__).realpath)
|
|
14
|
+
|
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
|
16
|
+
|
|
17
|
+
if File.file?(bundle_binstub)
|
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
|
19
|
+
load(bundle_binstub)
|
|
20
|
+
else
|
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require "rubygems"
|
|
27
|
+
require "bundler/setup"
|
|
28
|
+
|
|
29
|
+
load Gem.bin_path("rake", "rake")
|
data/bin/rspec
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# This file was generated by Bundler.
|
|
6
|
+
#
|
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
|
8
|
+
# this file is here to facilitate running it.
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require "pathname"
|
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
|
13
|
+
Pathname.new(__FILE__).realpath)
|
|
14
|
+
|
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
|
16
|
+
|
|
17
|
+
if File.file?(bundle_binstub)
|
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
|
19
|
+
load(bundle_binstub)
|
|
20
|
+
else
|
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require "rubygems"
|
|
27
|
+
require "bundler/setup"
|
|
28
|
+
|
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/bin/setup
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require "network_profile/version"
|
|
2
|
+
require 'network_profile/extractor'
|
|
3
|
+
require 'network_profile/extractors/default_profile'
|
|
4
|
+
require 'active_support/core_ext/module/attribute_accessors'
|
|
5
|
+
|
|
6
|
+
module NetworkProfile
|
|
7
|
+
class Error < StandardError; end
|
|
8
|
+
|
|
9
|
+
mattr_accessor :headers, :github_api_key
|
|
10
|
+
|
|
11
|
+
self.headers = {
|
|
12
|
+
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
13
|
+
'Accept-Language' => 'de,en-US;q=0.7,en;q=0.3',
|
|
14
|
+
'Referer' => 'https://www.google.com',
|
|
15
|
+
'DNT' => '1',
|
|
16
|
+
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:73.0) Gecko/20100101 Firefox/73.0',
|
|
17
|
+
}
|
|
18
|
+
self.github_api_key = nil
|
|
19
|
+
|
|
20
|
+
def self.parse(link, include_fallback_custom: false)
|
|
21
|
+
NetworkProfile::DefaultProfile.parse(link, include_fallback_custom: include_fallback_custom)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
require 'active_support/core_ext/string/filters'
|
|
2
|
+
class NetworkProfile::Extractor
|
|
3
|
+
# Logic from:
|
|
4
|
+
# https://github.com/tenderlove/rails_autolink/blob/master/lib/rails_autolink/helpers.rb
|
|
5
|
+
AUTO_LINK_RE = %r{
|
|
6
|
+
(?: ((?:ed2k|ftp|http|https|irc|mailto|news|gopher|nntp|telnet|webcal|xmpp|callto|feed|svn|urn|aim|rsync|tag|ssh|sftp|rtsp|afs|file):)// | www\. )
|
|
7
|
+
[^\s<\u00A0"]+
|
|
8
|
+
}ix.freeze
|
|
9
|
+
WORD_PATTERN = '\p{Word}'.freeze
|
|
10
|
+
BRACKETS = { ']' => '[', ')' => '(', '}' => '{' }.freeze
|
|
11
|
+
|
|
12
|
+
def self.call(string)
|
|
13
|
+
new(string).extracted_links!
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def initialize(string)
|
|
17
|
+
@string = string
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def extracted_links!
|
|
21
|
+
extracted = links.map do |l|
|
|
22
|
+
NetworkProfile.parse(l)
|
|
23
|
+
rescue StandardError => e
|
|
24
|
+
p e
|
|
25
|
+
nil
|
|
26
|
+
end
|
|
27
|
+
extracted.compact
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def links
|
|
31
|
+
return @links if @links
|
|
32
|
+
|
|
33
|
+
@links ||= []
|
|
34
|
+
mapped_string.scan(AUTO_LINK_RE) { |_|
|
|
35
|
+
scheme = Regexp.last_match(1)
|
|
36
|
+
href = $&
|
|
37
|
+
punctuation = []
|
|
38
|
+
while href.sub!(%r{[^#{WORD_PATTERN}/-=&]$}, '')
|
|
39
|
+
punctuation.push($&)
|
|
40
|
+
if opening = BRACKETS[punctuation.last] and href.scan(opening).size > href.scan(punctuation.last).size
|
|
41
|
+
href << punctuation.pop
|
|
42
|
+
break
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
href = 'https://' + href unless scheme
|
|
46
|
+
@links << href
|
|
47
|
+
}
|
|
48
|
+
@links.uniq
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
TLD = /(?<tld>com|de|net|fr|at|ch|info)/.freeze
|
|
52
|
+
HOST_PART = %r{(?<host>[a-z\-\.0-9]+)}.freeze
|
|
53
|
+
|
|
54
|
+
def mapped_string
|
|
55
|
+
@string.
|
|
56
|
+
gsub(%r{ (#{HOST_PART}\.#{TLD}/)}) { |_|
|
|
57
|
+
host = Regexp.last_match['host']
|
|
58
|
+
"https://#{host}.#{Regexp.last_match['tld']}/"
|
|
59
|
+
}.
|
|
60
|
+
gsub(%r{ www *\. +#{HOST_PART} *\. *#{TLD}(?<path>[^<\u00A0"]+)}) { |_|
|
|
61
|
+
path = Regexp.last_match['path'].remove(' ')
|
|
62
|
+
"www.#{Regexp.last_match['host']}.#{Regexp.last_match['tld']}#{path}"
|
|
63
|
+
}
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
require 'rdf/microdata'
|
|
2
|
+
require 'active_support/descendants_tracker'
|
|
3
|
+
require 'active_support/core_ext/module/attribute_accessors'
|
|
4
|
+
require 'active_support/core_ext/string/inflections'
|
|
5
|
+
require 'nokogiri'
|
|
6
|
+
require 'typhoeus'
|
|
7
|
+
|
|
8
|
+
module NetworkProfile
|
|
9
|
+
class DefaultProfile
|
|
10
|
+
include ActiveSupport::DescendantsTracker
|
|
11
|
+
|
|
12
|
+
cattr_accessor :mdi_icon
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
attr_accessor :headers
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.auto_extractor_link_types
|
|
19
|
+
[
|
|
20
|
+
NetworkProfile::GithubProfile,
|
|
21
|
+
NetworkProfile::GithubProject,
|
|
22
|
+
NetworkProfile::LinkedinProfile,
|
|
23
|
+
NetworkProfile::InstagramProfile,
|
|
24
|
+
NetworkProfile::XingProfile,
|
|
25
|
+
NetworkProfile::ResearchgateProfile,
|
|
26
|
+
NetworkProfile::UpworkProfile,
|
|
27
|
+
NetworkProfile::FacebookProfile,
|
|
28
|
+
NetworkProfile::StackoverflowProfile,
|
|
29
|
+
].freeze
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.all_types
|
|
33
|
+
auto_extractor_link_types + [NetworkProfile::Custom]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def self.parse(link, include_fallback_custom: false)
|
|
37
|
+
link_type = (include_fallback_custom ? all_types : auto_extractor_link_types).find { |i| i.handle?(link) }
|
|
38
|
+
if link_type
|
|
39
|
+
link_type.new(link.strip).data
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def initialize(link)
|
|
44
|
+
@link = link
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def image
|
|
48
|
+
img = doc.at('meta[property=og\:image]')&.[]('content')
|
|
49
|
+
if img && img[%r{^/\w+}]
|
|
50
|
+
img = URI.join(@link, img).to_s
|
|
51
|
+
end
|
|
52
|
+
img
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def title
|
|
56
|
+
doc.at('title')&.text
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def text
|
|
60
|
+
doc.at('meta[property=og\:description]')&.[]('content') || doc.at('meta[name=description]')&.[]('content')
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def data
|
|
64
|
+
{
|
|
65
|
+
site_icon: mdi_icon,
|
|
66
|
+
link: @link,
|
|
67
|
+
title: title,
|
|
68
|
+
text: text,
|
|
69
|
+
image: image,
|
|
70
|
+
type: self.class.name.underscore.split('/').last
|
|
71
|
+
}.merge(extra_data)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def extra_data
|
|
75
|
+
{}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def response
|
|
81
|
+
@response ||= Typhoeus.get(@link, headers: NetworkProfile.headers, followlocation: true)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def doc
|
|
85
|
+
@doc ||= Nokogiri.parse(response.body)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def json_ld
|
|
89
|
+
@json_ld ||= JSON.parse(doc.search('script[type*=ld]').first.text)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def rdf
|
|
93
|
+
@rdf ||= map_rdf(
|
|
94
|
+
RDF::Microdata::Reader.new(response.body).to_h
|
|
95
|
+
)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def map_rdf(tree)
|
|
99
|
+
tree.
|
|
100
|
+
transform_keys { |v| map_rdf_value(v) }.
|
|
101
|
+
transform_values { |v| map_rdf_value(v) }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def map_rdf_value(value)
|
|
105
|
+
case value
|
|
106
|
+
when RDF::Vocabulary::Term then value.fragment
|
|
107
|
+
when RDF::URI then value.to_base
|
|
108
|
+
when RDF::Node then value.id
|
|
109
|
+
when RDF::Literal then value.value
|
|
110
|
+
when Hash then map_rdf(value)
|
|
111
|
+
when Array then value.map { |i| map_rdf_value(i) }
|
|
112
|
+
else
|
|
113
|
+
value
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
require_relative './custom'
|
|
120
|
+
require_relative './github_profile'
|
|
121
|
+
require_relative './github_project'
|
|
122
|
+
require_relative './linkedin_profile'
|
|
123
|
+
require_relative './instagram_profile'
|
|
124
|
+
require_relative './xing_profile'
|
|
125
|
+
require_relative './researchgate_profile'
|
|
126
|
+
require_relative './upwork_profile'
|
|
127
|
+
require_relative './facebook_profile'
|
|
128
|
+
require_relative './stackoverflow_profile'
|
|
129
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
require_relative './network_profile_without_extraction'
|
|
2
|
+
|
|
3
|
+
module NetworkProfile
|
|
4
|
+
class FacebookProfile < NetworkProfileWithoutExtraction
|
|
5
|
+
self.mdi_icon = 'facebook'
|
|
6
|
+
|
|
7
|
+
def self.handle?(link)
|
|
8
|
+
(e = link[%r{facebook.com/([\w\.]+)}, 1]) && e.length > 3 && e != 'groups'
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def profile_description
|
|
12
|
+
"Facebook Profil:"
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'active_support/core_ext/hash/indifferent_access'
|
|
2
|
+
|
|
3
|
+
module NetworkProfile::GithubGraphql
|
|
4
|
+
def query!(query)
|
|
5
|
+
r = Typhoeus.post("https://api.github.com/graphql",
|
|
6
|
+
body: { query: query }.to_json,
|
|
7
|
+
headers: {
|
|
8
|
+
"Authorization": "bearer #{NetworkProfile.github_api_key}"
|
|
9
|
+
})
|
|
10
|
+
if r.success?
|
|
11
|
+
JSON.parse(r.body).with_indifferent_access
|
|
12
|
+
else
|
|
13
|
+
raise ArgumentError, "Fetching query failed: #{r.code}"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def json
|
|
18
|
+
@json ||= query!(query)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def doc
|
|
22
|
+
raise NotImplementedError
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
require_relative './github_graphql'
|
|
2
|
+
|
|
3
|
+
module NetworkProfile
|
|
4
|
+
class GithubProfile < DefaultProfile
|
|
5
|
+
include GithubGraphql
|
|
6
|
+
self.mdi_icon = 'github'
|
|
7
|
+
|
|
8
|
+
def self.handle?(link)
|
|
9
|
+
link.to_s[%r{github.com/[^/]+/?$}] && NetworkProfile.github_api_key
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def query
|
|
13
|
+
username = @link[%r{github.com/([^/]+)}, 1]
|
|
14
|
+
<<~DOC
|
|
15
|
+
query {
|
|
16
|
+
organization(login:"#{username}") {
|
|
17
|
+
avatarUrl
|
|
18
|
+
name
|
|
19
|
+
bio: description
|
|
20
|
+
location
|
|
21
|
+
websiteUrl
|
|
22
|
+
...RepoFragment
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
user(login:"#{username}") {
|
|
26
|
+
avatarUrl
|
|
27
|
+
name
|
|
28
|
+
bio
|
|
29
|
+
company
|
|
30
|
+
location
|
|
31
|
+
websiteUrl
|
|
32
|
+
followers {
|
|
33
|
+
totalCount
|
|
34
|
+
}
|
|
35
|
+
...RepoFragment
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
fragment RepoFragment on ProfileOwner {
|
|
39
|
+
pinnedItems(first: 9, types: [REPOSITORY]) {
|
|
40
|
+
edges {
|
|
41
|
+
node {
|
|
42
|
+
... on Repository {
|
|
43
|
+
nameWithOwner,
|
|
44
|
+
url,
|
|
45
|
+
createdAt,
|
|
46
|
+
updatedAt
|
|
47
|
+
stargazers { totalCount }
|
|
48
|
+
watchers {
|
|
49
|
+
totalCount
|
|
50
|
+
},
|
|
51
|
+
primaryLanguage {
|
|
52
|
+
name
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
DOC
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def profile_data
|
|
63
|
+
json.dig('data', 'organization') || json.dig('data', 'user')
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def title
|
|
67
|
+
profile_data['name']
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def text
|
|
71
|
+
profile_data['bio']
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def image
|
|
75
|
+
profile_data['avatarUrl']
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def extra_data
|
|
79
|
+
{
|
|
80
|
+
company: profile_data['company'],
|
|
81
|
+
location: profile_data['location'],
|
|
82
|
+
profile_type: json.dig('data', 'organization') ? "organization" : "user",
|
|
83
|
+
followers: profile_data.dig('followers', 'totalCount'),
|
|
84
|
+
website: profile_data.dig('websiteUrl'),
|
|
85
|
+
pinned: profile_data.dig('pinnedItems', 'edges').map { |i|
|
|
86
|
+
n = i['node']
|
|
87
|
+
{ name: n['nameWithOwner'],
|
|
88
|
+
url: n['url'],
|
|
89
|
+
created: Time.parse(n['createdAt']).to_date,
|
|
90
|
+
updated: Time.parse(n['updatedAt']).to_date,
|
|
91
|
+
language: n.dig('primaryLanguage', 'name'),
|
|
92
|
+
stars: n['stargazers']['totalCount'],
|
|
93
|
+
watchers: n['watchers']['totalCount'] }
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
require_relative './github_graphql'
|
|
2
|
+
|
|
3
|
+
module NetworkProfile
|
|
4
|
+
class GithubProject < DefaultProfile
|
|
5
|
+
include GithubGraphql
|
|
6
|
+
self.mdi_icon = 'github'
|
|
7
|
+
|
|
8
|
+
def self.handle?(link)
|
|
9
|
+
link.to_s[%r{github.com/[^/]+/.+}] && NetworkProfile.github_api_key
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def query
|
|
13
|
+
_, author, repo = @link.match(%r{github.com/([^/]+)/([^/\?]+)(\.git)?}).to_a
|
|
14
|
+
<<~DOC
|
|
15
|
+
query {
|
|
16
|
+
repository(name:"#{repo}", owner: "#{author}") {
|
|
17
|
+
createdAt
|
|
18
|
+
description
|
|
19
|
+
nameWithOwner
|
|
20
|
+
updatedAt
|
|
21
|
+
languages(first:10) {
|
|
22
|
+
edges {
|
|
23
|
+
node {
|
|
24
|
+
name
|
|
25
|
+
}
|
|
26
|
+
size
|
|
27
|
+
}
|
|
28
|
+
totalCount
|
|
29
|
+
}
|
|
30
|
+
licenseInfo { name }
|
|
31
|
+
forkCount
|
|
32
|
+
isFork
|
|
33
|
+
defaultBranchRef {
|
|
34
|
+
name
|
|
35
|
+
target {
|
|
36
|
+
... on Commit {
|
|
37
|
+
committedDate
|
|
38
|
+
history(first: 0) {
|
|
39
|
+
totalCount
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
issues {
|
|
45
|
+
totalCount
|
|
46
|
+
}
|
|
47
|
+
stargazers {
|
|
48
|
+
totalCount
|
|
49
|
+
}
|
|
50
|
+
watchers {
|
|
51
|
+
totalCount
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
DOC
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def title
|
|
59
|
+
json.dig('data', 'repository', 'nameWithOwner')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def text
|
|
63
|
+
json.dig('data', 'repository', 'description')
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def last_commit
|
|
67
|
+
Time.parse(json.dig('data', 'repository', 'defaultBranchRef', 'target', 'committedDate')).to_date
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def image
|
|
71
|
+
nil
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def extra_data
|
|
75
|
+
{
|
|
76
|
+
watchers: json.dig('data', 'repository', 'watchers', 'totalCount'),
|
|
77
|
+
forks: json.dig('data', 'repository', 'forkCount'),
|
|
78
|
+
stars: json.dig('data', 'repository', 'stargazers', 'totalCount'),
|
|
79
|
+
issue_count: json.dig('data', 'repository', 'issues', 'totalCount'),
|
|
80
|
+
commits: json.dig('data', 'repository', 'defaultBranchRef', 'target', 'history', 'totalCount'),
|
|
81
|
+
license: json.dig('data', 'repository', 'licenseInfo', 'name'),
|
|
82
|
+
created: Time.parse(json.dig('data', 'repository', 'createdAt')).to_date,
|
|
83
|
+
language_bytes: json.dig('data', 'repository', 'languages', 'edges')&.map { |l| [l.dig('node', 'name'), l['size']] }&.sort_by { |_a, b| -b },
|
|
84
|
+
last_commit: last_commit
|
|
85
|
+
}
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
require_relative './network_profile_without_extraction'
|
|
2
|
+
|
|
3
|
+
module NetworkProfile
|
|
4
|
+
class InstagramProfile < NetworkProfileWithoutExtraction
|
|
5
|
+
self.mdi_icon = 'instagram'
|
|
6
|
+
def self.handle?(link)
|
|
7
|
+
(e = link[%r{instagram.com/([\w\.]+)}, 1]) && e.length > 3 && e != 'groups'
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def profile_description
|
|
11
|
+
"Instagram Profil:"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
require_relative './network_profile_without_extraction'
|
|
2
|
+
|
|
3
|
+
module NetworkProfile
|
|
4
|
+
class LinkedinProfile < NetworkProfileWithoutExtraction
|
|
5
|
+
self.mdi_icon = 'linkedin'
|
|
6
|
+
def self.handle?(link)
|
|
7
|
+
link['linkedin.com/in/']
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def profile_description
|
|
11
|
+
"LinkedIn Profil:"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
require 'active_support/core_ext/string/inflections'
|
|
2
|
+
require_relative './default_profile.rb'
|
|
3
|
+
|
|
4
|
+
module NetworkProfile
|
|
5
|
+
class NetworkProfileWithoutExtraction < DefaultProfile
|
|
6
|
+
def profile_description
|
|
7
|
+
"Profil: "
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def title
|
|
11
|
+
"#{profile_description} #{@link.split('/').last}"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def data
|
|
15
|
+
{
|
|
16
|
+
title: title,
|
|
17
|
+
text: "",
|
|
18
|
+
image: nil,
|
|
19
|
+
type: self.class.name.underscore.split('/').last,
|
|
20
|
+
link: @link,
|
|
21
|
+
site_icon: mdi_icon,
|
|
22
|
+
}
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
module NetworkProfile
|
|
2
|
+
class ResearchgateProfile < DefaultProfile
|
|
3
|
+
self.mdi_icon = ''
|
|
4
|
+
|
|
5
|
+
def self.handle?(link)
|
|
6
|
+
link['researchgate.net/profile/']
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def json_ld
|
|
10
|
+
original = super
|
|
11
|
+
if original['@graph']
|
|
12
|
+
original['@graph'].first
|
|
13
|
+
else
|
|
14
|
+
original
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def title
|
|
19
|
+
json_ld['name']
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def text
|
|
23
|
+
json_ld.dig('affiliation', 'name') || doc.at('.org')&.text
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def last_item
|
|
27
|
+
item = rdf.find { |_, v| v['type'].to_s['ScholarlyArticle'] }.last
|
|
28
|
+
return unless item
|
|
29
|
+
|
|
30
|
+
title = item.dig('<http://schema.org/headline>', 0)
|
|
31
|
+
date = item.dig('<http://schema.org/datePublished>', 0)
|
|
32
|
+
"#{title} (#{date})"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def extra_data
|
|
36
|
+
items, reads, citations = doc.at(".profile-content-item .nova-c-card").
|
|
37
|
+
search(".nova-o-grid__column").
|
|
38
|
+
map { |col| col.search('.nova-e-text').map(&:text) }.
|
|
39
|
+
map(&:first).map { |i| i.gsub(',', '').to_i }
|
|
40
|
+
|
|
41
|
+
{ items: items, reads: reads, citations: citations, last_item: last_item }
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
module NetworkProfile
|
|
2
|
+
# Tags:
|
|
3
|
+
# https://api.stackexchange.com/2.2/users/220292/top-tags?pagesize=10&site=stackoverflow
|
|
4
|
+
|
|
5
|
+
class StackoverflowProfile < DefaultProfile
|
|
6
|
+
self.mdi_icon = 'stack-overflow'
|
|
7
|
+
SITES = [
|
|
8
|
+
["https://stackoverflow.com", "stackoverflow", "Stack Overflow"],
|
|
9
|
+
["https://serverfault.com", "serverfault", "Server Fault"],
|
|
10
|
+
["https://superuser.com", "superuser", "Super User"],
|
|
11
|
+
["https://webapps.stackexchange.com", "webapps", "Web Applications"],
|
|
12
|
+
["https://gaming.stackexchange.com", "gaming", "Arqade"],
|
|
13
|
+
["https://webmasters.stackexchange.com", "webmasters", "Webmasters"],
|
|
14
|
+
["https://cooking.stackexchange.com", "cooking", "Seasoned Advice"],
|
|
15
|
+
["https://gamedev.stackexchange.com", "gamedev", "Game Development"],
|
|
16
|
+
["https://photo.stackexchange.com", "photo", "Photography"],
|
|
17
|
+
["https://stats.stackexchange.com", "stats", "Cross Validated"],
|
|
18
|
+
["https://math.stackexchange.com", "math", "Mathematics"],
|
|
19
|
+
["https://diy.stackexchange.com", "diy", "Home Improvement"],
|
|
20
|
+
["https://gis.stackexchange.com", "gis", "Geographic Information Systems"],
|
|
21
|
+
["https://tex.stackexchange.com", "tex", "TeX - LaTeX"],
|
|
22
|
+
["https://askubuntu.com", "askubuntu", "Ask Ubuntu"],
|
|
23
|
+
].freeze
|
|
24
|
+
|
|
25
|
+
def self.handle?(link)
|
|
26
|
+
SITES.any? { |s, _, _| link.include?(s + "/users/") }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def site
|
|
30
|
+
@site ||= SITES.find { |s, _, _| @link.include?(s) }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def title
|
|
34
|
+
user_api['display_name']
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def image
|
|
38
|
+
user_api['profile_image']
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def text
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def extra_data
|
|
45
|
+
{
|
|
46
|
+
reputation: user_api.dig('reputation'),
|
|
47
|
+
created: Time.at(user_api['creation_date']).to_date,
|
|
48
|
+
location: user_api.dig('location'),
|
|
49
|
+
site: site[2],
|
|
50
|
+
site_logo: "https://cdn.sstatic.net/Sites/#{site[1]}/img/apple-touch-icon.png",
|
|
51
|
+
tags: tags_api.map { |j| [j['tag_name'], j['answer_score'] + j['question_score']] }
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def user_id
|
|
56
|
+
URI.parse(@link).path[%r{/users/(\d+)/?}, 1]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def user_api
|
|
62
|
+
@user_api ||=
|
|
63
|
+
begin
|
|
64
|
+
url = "https://api.stackexchange.com/2.2/users/#{user_id}?order=desc&sort=reputation&site=#{site[1]}"
|
|
65
|
+
api_call(url).dig('items', 0)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def tags_api
|
|
70
|
+
@tags_api ||=
|
|
71
|
+
begin
|
|
72
|
+
url = "https://api.stackexchange.com/2.2/users/#{user_id}/top-tags?pagesize=10&site=#{site[1]}"
|
|
73
|
+
api_call(url).dig('items')
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def api_call(url)
|
|
78
|
+
JSON.parse(Typhoeus.get(url, accept_encoding: 'gzip').body)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
require 'active_support/core_ext/string/filters'
|
|
2
|
+
|
|
3
|
+
module NetworkProfile
|
|
4
|
+
class UpworkProfile < DefaultProfile
|
|
5
|
+
self.mdi_icon = 'upwork'
|
|
6
|
+
def self.handle?(link)
|
|
7
|
+
link[%r{upwork.com/o/profiles/users/.+}]
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def title
|
|
11
|
+
php_vars.dig('profile', 'profile', 'name')
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def text
|
|
15
|
+
doc.at('h2 strong').text
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def php_vars
|
|
19
|
+
@php_vars ||=
|
|
20
|
+
begin
|
|
21
|
+
t = doc.search('script').find { |i| i && i.text['PROFILE_RESPONSE'] }.text
|
|
22
|
+
t.remove!(/window.PROFILE_RESPONSE=.*summary:/)
|
|
23
|
+
JSON.parse(t.remove(/\}$/))
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def extra_data
|
|
28
|
+
profile = php_vars.dig('profile')
|
|
29
|
+
rate = profile.dig('stats', 'hourlyRate')
|
|
30
|
+
{
|
|
31
|
+
country: profile.dig('profile', 'location').yield_self { |v| "#{v['city']}, #{v['country']}" },
|
|
32
|
+
hours: profile.dig('stats', 'totalHours').floor,
|
|
33
|
+
jobs: profile.dig('stats', 'totalJobsWorked').floor,
|
|
34
|
+
rating: profile.dig('stats', 'rating').round(2),
|
|
35
|
+
hourly_rate: "#{rate['amount']} #{rate['currencyCode']}",
|
|
36
|
+
english_level: profile['stats']['englishLevel'],
|
|
37
|
+
hire_again: profile.dig('stats', 'hireAgainPercentage'),
|
|
38
|
+
skills: profile.dig('profile', 'skills').map { |i| i['prettyName'] }
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
module NetworkProfile
|
|
2
|
+
class XingProfile < DefaultProfile
|
|
3
|
+
self.mdi_icon = 'xing'
|
|
4
|
+
|
|
5
|
+
def self.handle?(link)
|
|
6
|
+
link['xing.com/profile/']
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def title
|
|
10
|
+
doc.at('h1').text.strip
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def text
|
|
14
|
+
json_ld.dig('jobTitle')
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def extra_data
|
|
18
|
+
{
|
|
19
|
+
employment_status: doc.at('[data-qa=xing-id-work_experience]')&.text&.split(', ')&.first,
|
|
20
|
+
tags: json_ld&.fetch('makesOffer', [])&.map { |i| i['name'] } || [],
|
|
21
|
+
languages: doc.at('[data-qa=language-skills-section]')&.search('li')&.map { |i| "#{i.at('h3').text} (#{i.at('div').text})" },
|
|
22
|
+
}
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
require_relative 'lib/network_profile/version'
|
|
2
|
+
|
|
3
|
+
Gem::Specification.new do |spec|
|
|
4
|
+
spec.name = "network_profile"
|
|
5
|
+
spec.version = NetworkProfile::VERSION
|
|
6
|
+
spec.authors = ["Stefan Wienert"]
|
|
7
|
+
spec.email = ["info@stefanwienert.de"]
|
|
8
|
+
|
|
9
|
+
spec.summary = %q{Extract profile metadata from various social-media-profiles}
|
|
10
|
+
spec.description = %q{Extract profile metadata from various social-media-profiles, such as Twitter, XING, Github, Stackoverflow or generic og-metatags.}
|
|
11
|
+
spec.homepage = "https://github.com/pludoni/network_profile"
|
|
12
|
+
spec.license = "MIT"
|
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
|
14
|
+
|
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
16
|
+
spec.metadata["source_code_uri"] = "https://github.com/pludoni/network_profile"
|
|
17
|
+
# spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
|
|
18
|
+
|
|
19
|
+
# Specify which files should be added to the gem when it is released.
|
|
20
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
21
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
|
22
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
23
|
+
end
|
|
24
|
+
spec.bindir = "exe"
|
|
25
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
26
|
+
spec.require_paths = ["lib"]
|
|
27
|
+
spec.add_dependency "typhoeus"
|
|
28
|
+
spec.add_dependency "rdf-microdata"
|
|
29
|
+
spec.add_dependency "activesupport", ">= 5.0.0"
|
|
30
|
+
spec.add_dependency "nokogiri"
|
|
31
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: network_profile
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Stefan Wienert
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2020-09-22 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: typhoeus
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rdf-microdata
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: activesupport
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: 5.0.0
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: 5.0.0
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: nokogiri
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
69
|
+
description: Extract profile metadata from various social-media-profiles, such as
|
|
70
|
+
Twitter, XING, Github, Stackoverflow or generic og-metatags.
|
|
71
|
+
email:
|
|
72
|
+
- info@stefanwienert.de
|
|
73
|
+
executables: []
|
|
74
|
+
extensions: []
|
|
75
|
+
extra_rdoc_files: []
|
|
76
|
+
files:
|
|
77
|
+
- ".github/workflows/verify.yml"
|
|
78
|
+
- ".gitignore"
|
|
79
|
+
- ".rspec"
|
|
80
|
+
- Gemfile
|
|
81
|
+
- Gemfile.lock
|
|
82
|
+
- LICENSE.txt
|
|
83
|
+
- README.md
|
|
84
|
+
- Rakefile
|
|
85
|
+
- bin/console
|
|
86
|
+
- bin/rake
|
|
87
|
+
- bin/rspec
|
|
88
|
+
- bin/setup
|
|
89
|
+
- lib/network_profile.rb
|
|
90
|
+
- lib/network_profile/extractor.rb
|
|
91
|
+
- lib/network_profile/extractors/custom.rb
|
|
92
|
+
- lib/network_profile/extractors/default_profile.rb
|
|
93
|
+
- lib/network_profile/extractors/facebook_profile.rb
|
|
94
|
+
- lib/network_profile/extractors/github_graphql.rb
|
|
95
|
+
- lib/network_profile/extractors/github_profile.rb
|
|
96
|
+
- lib/network_profile/extractors/github_project.rb
|
|
97
|
+
- lib/network_profile/extractors/instagram_profile.rb
|
|
98
|
+
- lib/network_profile/extractors/linkedin_profile.rb
|
|
99
|
+
- lib/network_profile/extractors/network_profile_without_extraction.rb
|
|
100
|
+
- lib/network_profile/extractors/researchgate_profile.rb
|
|
101
|
+
- lib/network_profile/extractors/stackoverflow_profile.rb
|
|
102
|
+
- lib/network_profile/extractors/upwork_profile.rb
|
|
103
|
+
- lib/network_profile/extractors/xing_profile.rb
|
|
104
|
+
- lib/network_profile/version.rb
|
|
105
|
+
- network_profile.gemspec
|
|
106
|
+
homepage: https://github.com/pludoni/network_profile
|
|
107
|
+
licenses:
|
|
108
|
+
- MIT
|
|
109
|
+
metadata:
|
|
110
|
+
homepage_uri: https://github.com/pludoni/network_profile
|
|
111
|
+
source_code_uri: https://github.com/pludoni/network_profile
|
|
112
|
+
post_install_message:
|
|
113
|
+
rdoc_options: []
|
|
114
|
+
require_paths:
|
|
115
|
+
- lib
|
|
116
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
117
|
+
requirements:
|
|
118
|
+
- - ">="
|
|
119
|
+
- !ruby/object:Gem::Version
|
|
120
|
+
version: 2.3.0
|
|
121
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
|
+
requirements:
|
|
123
|
+
- - ">="
|
|
124
|
+
- !ruby/object:Gem::Version
|
|
125
|
+
version: '0'
|
|
126
|
+
requirements: []
|
|
127
|
+
rubyforge_project:
|
|
128
|
+
rubygems_version: 2.7.6
|
|
129
|
+
signing_key:
|
|
130
|
+
specification_version: 4
|
|
131
|
+
summary: Extract profile metadata from various social-media-profiles
|
|
132
|
+
test_files: []
|