hawker 0.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/hawker.rb +29 -0
- data/lib/hawker/drivers/default.rb +32 -0
- data/lib/hawker/drivers/github.rb +71 -0
- data/lib/hawker/drivers/instagram.rb +66 -0
- data/lib/hawker/drivers/twitter.rb +82 -0
- data/lib/hawker/mapper.rb +22 -0
- data/lib/hawker/request.rb +13 -0
- data/lib/hawker/version.rb +12 -0
- data/spec/helper.rb +5 -0
- metadata +114 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 51136fdddf3a7c5b7f533100dea0deb5167569946943d9719b23f6a19f1353f8
|
4
|
+
data.tar.gz: 8d7e8615ad452f0f1145c68762ca5c61cbf387ab95255f945e571322c20198e6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fc14643a3acc9ff407475b259d078579388f429b1148505aef69ec6daefc06da36fa2fd9dd6696fd6b11ed9df4c52014f8af0b95958533fd3b19d57ddff9668b
|
7
|
+
data.tar.gz: a95638854a9b5b6c9fa4278dcc875c1bedf351b664664f7a27985ca28c30f36b0c704943f1dcfc7cb53d127799f74d0f0e7530a0db680683fcdca7f27f75aa1a
|
data/lib/hawker.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'hawker/request'
|
2
|
+
require 'hawker/drivers/default'
|
3
|
+
require 'hawker/drivers/github'
|
4
|
+
require 'hawker/drivers/instagram'
|
5
|
+
require 'hawker/drivers/twitter'
|
6
|
+
require 'hawker/mapper'
|
7
|
+
|
8
|
+
module Hawker
|
9
|
+
# Returns the driver instance mapped to the given URL
|
10
|
+
#
|
11
|
+
# @return [Object]
|
12
|
+
def self.get(url)
|
13
|
+
response = Hawker::Request.call(url)
|
14
|
+
|
15
|
+
Hawker::Mapper.map(url, response)
|
16
|
+
end
|
17
|
+
|
18
|
+
# The list of available drivers
|
19
|
+
#
|
20
|
+
# @return [Array<String>]
|
21
|
+
def self.drivers
|
22
|
+
[
|
23
|
+
"GitHub",
|
24
|
+
"Twitter",
|
25
|
+
"Instagram",
|
26
|
+
"Default"
|
27
|
+
]
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Hawker
|
4
|
+
module Drivers
|
5
|
+
class Default
|
6
|
+
def initialize(response)
|
7
|
+
@response = response
|
8
|
+
end
|
9
|
+
|
10
|
+
# The current page title
|
11
|
+
#
|
12
|
+
# @return [String]
|
13
|
+
def page_title
|
14
|
+
node_text html.at("head title")
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
attr_reader :response
|
20
|
+
|
21
|
+
def html
|
22
|
+
@html ||= ::Nokogiri::HTML(response.body)
|
23
|
+
end
|
24
|
+
|
25
|
+
def node_text(node)
|
26
|
+
return if node.nil?
|
27
|
+
|
28
|
+
node.text.strip
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Hawker
|
2
|
+
module Drivers
|
3
|
+
class Github < Hawker::Drivers::Default
|
4
|
+
|
5
|
+
# The current user full name
|
6
|
+
#
|
7
|
+
# @return [String]
|
8
|
+
def name
|
9
|
+
node_text html.at("h1.vcard-names span.vcard-fullname[itemprop='name']")
|
10
|
+
end
|
11
|
+
|
12
|
+
# The current user nickname
|
13
|
+
#
|
14
|
+
# @return [String]
|
15
|
+
def username
|
16
|
+
node_text html.at("h1.vcard-names span.vcard-username[itemprop='additionalName']")
|
17
|
+
end
|
18
|
+
|
19
|
+
# The current user company
|
20
|
+
#
|
21
|
+
# @return [String]
|
22
|
+
def company
|
23
|
+
node_text html.at("li[itemprop='worksFor'] div")
|
24
|
+
end
|
25
|
+
|
26
|
+
# The current user location
|
27
|
+
#
|
28
|
+
# @return [String]
|
29
|
+
def location
|
30
|
+
node_text html.at("li[itemprop='homeLocation'] span")
|
31
|
+
end
|
32
|
+
|
33
|
+
# The current user website
|
34
|
+
#
|
35
|
+
# @return [String]
|
36
|
+
def website
|
37
|
+
node = html.at("li[itemprop='url'] a")
|
38
|
+
node[:href] unless node.nil?
|
39
|
+
end
|
40
|
+
|
41
|
+
# The count of repositiories owned by the current user
|
42
|
+
#
|
43
|
+
# @return [Integer]
|
44
|
+
def repositories
|
45
|
+
node_text(html.at("a[title='Repositories'] span.Counter")).to_i
|
46
|
+
end
|
47
|
+
|
48
|
+
# The count of stars that the current user gave
|
49
|
+
#
|
50
|
+
# @return [Integer]
|
51
|
+
def stars
|
52
|
+
node_text(html.at("a[title='Stars'] span.Counter")).to_i
|
53
|
+
end
|
54
|
+
|
55
|
+
# The count of users following the current user
|
56
|
+
#
|
57
|
+
# @return [Integer]
|
58
|
+
def followers
|
59
|
+
node_text(html.at("a[title='Followers'] span.Counter")).to_i
|
60
|
+
end
|
61
|
+
|
62
|
+
# The count of users that the current user is following
|
63
|
+
#
|
64
|
+
# @return [Integer]
|
65
|
+
def following
|
66
|
+
node_text(html.at("a[title='Following'] span.Counter")).to_i
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Hawker
|
4
|
+
module Drivers
|
5
|
+
class Instagram < Hawker::Drivers::Default
|
6
|
+
|
7
|
+
# The current user biography
|
8
|
+
#
|
9
|
+
# @return [String]
|
10
|
+
def biography
|
11
|
+
json["entry_data"]["ProfilePage"].first["graphql"]["user"]["biography"]
|
12
|
+
end
|
13
|
+
|
14
|
+
# The current user full name
|
15
|
+
#
|
16
|
+
# @return [String]
|
17
|
+
def full_name
|
18
|
+
json["entry_data"]["ProfilePage"].first["graphql"]["user"]["full_name"]
|
19
|
+
end
|
20
|
+
|
21
|
+
# The current user followers count
|
22
|
+
#
|
23
|
+
# @return [Integer]
|
24
|
+
def followers
|
25
|
+
json["entry_data"]["ProfilePage"].first["graphql"]["user"]["edge_followed_by"]["count"]
|
26
|
+
end
|
27
|
+
|
28
|
+
# The number of accounts that the current user follows
|
29
|
+
#
|
30
|
+
# @return [Integer]
|
31
|
+
def following
|
32
|
+
json["entry_data"]["ProfilePage"].first["graphql"]["user"]["edge_follow"]["count"]
|
33
|
+
end
|
34
|
+
|
35
|
+
# The current user external URL that is present in the biography
|
36
|
+
#
|
37
|
+
# @return [String]
|
38
|
+
def external_url
|
39
|
+
json["entry_data"]["ProfilePage"].first["graphql"]["user"]["external_url"]
|
40
|
+
end
|
41
|
+
|
42
|
+
# The current user profile picture URL
|
43
|
+
#
|
44
|
+
# @return [String]
|
45
|
+
def profile_pic_url
|
46
|
+
json["entry_data"]["ProfilePage"].first["graphql"]["user"]["profile_pic_url"]
|
47
|
+
end
|
48
|
+
|
49
|
+
# The current user Instagram username
|
50
|
+
#
|
51
|
+
# @return [String]
|
52
|
+
def username
|
53
|
+
json["entry_data"]["ProfilePage"].first["graphql"]["user"]["username"]
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def json
|
59
|
+
@json ||= begin
|
60
|
+
raw_json = html.to_s.match(/window\._sharedData =.*(?=;)/)
|
61
|
+
JSON.parse(raw_json.to_s.gsub("window._sharedData = ", ""))
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Hawker
|
2
|
+
module Drivers
|
3
|
+
class Twitter < Hawker::Drivers::Default
|
4
|
+
|
5
|
+
# The current user name
|
6
|
+
#
|
7
|
+
# @return [String]
|
8
|
+
def name
|
9
|
+
node_text(html.at("h1.ProfileHeaderCard-name a"))
|
10
|
+
end
|
11
|
+
|
12
|
+
# The current user biography note
|
13
|
+
#
|
14
|
+
# @return [String]
|
15
|
+
def bio
|
16
|
+
node_text(html.at("p.ProfileHeaderCard-bio"))
|
17
|
+
end
|
18
|
+
|
19
|
+
# The current user location
|
20
|
+
#
|
21
|
+
# @return [String]
|
22
|
+
def location
|
23
|
+
node_text(html.at("span.ProfileHeaderCard-locationText"))
|
24
|
+
end
|
25
|
+
|
26
|
+
# The current user website
|
27
|
+
#
|
28
|
+
# @return [String]
|
29
|
+
def website
|
30
|
+
node = html.at("span.ProfileHeaderCard-urlText a")
|
31
|
+
node[:title] if !node.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
# The current user account creation date
|
35
|
+
#
|
36
|
+
# @return [String]
|
37
|
+
def joined
|
38
|
+
html.at("span.ProfileHeaderCard-joinDateText")[:title]
|
39
|
+
end
|
40
|
+
|
41
|
+
# The current user tweets count
|
42
|
+
#
|
43
|
+
# @return [Integer]
|
44
|
+
def tweets
|
45
|
+
node = html.at("a[data-nav='tweets'] span.ProfileNav-value")
|
46
|
+
node.nil? ? 0 : node["data-count"].to_i
|
47
|
+
end
|
48
|
+
|
49
|
+
# The current user followers count
|
50
|
+
#
|
51
|
+
# @return [Integer]
|
52
|
+
def followers
|
53
|
+
node = html.at("li.ProfileNav-item--followers span.ProfileNav-value")
|
54
|
+
node.nil? ? 0 : node["data-count"].to_i
|
55
|
+
end
|
56
|
+
|
57
|
+
# The count of accounts that the current user follows
|
58
|
+
#
|
59
|
+
# @return [Integer]
|
60
|
+
def following
|
61
|
+
node = html.at("li.ProfileNav-item--following span.ProfileNav-value")
|
62
|
+
node.nil? ? 0 : node["data-count"].to_i
|
63
|
+
end
|
64
|
+
|
65
|
+
# The current user likes count
|
66
|
+
#
|
67
|
+
# @return [Integer]
|
68
|
+
def likes
|
69
|
+
node = html.at("li.ProfileNav-item.ProfileNav-item--favorites span.ProfileNav-value")
|
70
|
+
node.nil? ? 0 : node["data-count"].to_i
|
71
|
+
end
|
72
|
+
|
73
|
+
# The current user lists count
|
74
|
+
#
|
75
|
+
# @return [Integer]
|
76
|
+
def lists
|
77
|
+
node_text(html.at("li.ProfileNav-item.ProfileNav-item--lists span.ProfileNav-value")).to_i
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Hawker
|
4
|
+
class Mapper
|
5
|
+
MAPPINGS = {
|
6
|
+
/(http|https):\/\/(www.|)github.com\/[a-zA-Z0-9_]*/ => Hawker::Drivers::Github,
|
7
|
+
/(http|https):\/\/(www.|)instagram.com\/[a-zA-Z0-9_]*/ => Hawker::Drivers::Instagram,
|
8
|
+
/(http|https):\/\/(www.|)twitter.com\/[a-zA-Z0-9_]*/ => Hawker::Drivers::Twitter,
|
9
|
+
"default" => Hawker::Drivers::Default
|
10
|
+
}.freeze
|
11
|
+
|
12
|
+
# Maps the given URL to the driver class
|
13
|
+
#
|
14
|
+
# @return [Object]
|
15
|
+
def self.map(url, response)
|
16
|
+
driver_mappings = MAPPINGS.find { |key, value| url.match(key) }
|
17
|
+
driver = driver_mappings.nil? ? MAPPINGS["default"] : driver_mappings.last
|
18
|
+
|
19
|
+
driver.new(response)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rest-client'
|
2
|
+
|
3
|
+
module Hawker
|
4
|
+
class Request
|
5
|
+
|
6
|
+
# Performs the request to the given URL in order to get the page html
|
7
|
+
#
|
8
|
+
# @return [RestClient::Response]
|
9
|
+
def self.call(url)
|
10
|
+
::RestClient.get(url, user_agent: 'Mozilla/5.0 Mac OS')
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/spec/helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hawker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Paweł Dąbrowski
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-04-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rest-client
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.2
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '2.0'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.0.2
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: nokogiri
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '1.8'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.8.2
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '1.8'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.8.2
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: rspec
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '3.7'
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 3.7.0
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '3.7'
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 3.7.0
|
73
|
+
description: Scrap the most popular websites without the knowledge about the page
|
74
|
+
structure or API access
|
75
|
+
email: dziamber@gmail.com
|
76
|
+
executables: []
|
77
|
+
extensions: []
|
78
|
+
extra_rdoc_files: []
|
79
|
+
files:
|
80
|
+
- lib/hawker.rb
|
81
|
+
- lib/hawker/drivers/default.rb
|
82
|
+
- lib/hawker/drivers/github.rb
|
83
|
+
- lib/hawker/drivers/instagram.rb
|
84
|
+
- lib/hawker/drivers/twitter.rb
|
85
|
+
- lib/hawker/mapper.rb
|
86
|
+
- lib/hawker/request.rb
|
87
|
+
- lib/hawker/version.rb
|
88
|
+
- spec/helper.rb
|
89
|
+
homepage: http://github.com/rubyhero/hawker
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
metadata: {}
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
requirements: []
|
108
|
+
rubyforge_project:
|
109
|
+
rubygems_version: 2.7.3
|
110
|
+
signing_key:
|
111
|
+
specification_version: 4
|
112
|
+
summary: Scrap the most popular websites without the knowledge about the page structure
|
113
|
+
or API access
|
114
|
+
test_files: []
|