hawker 0.0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 51136fdddf3a7c5b7f533100dea0deb5167569946943d9719b23f6a19f1353f8
4
+ data.tar.gz: 8d7e8615ad452f0f1145c68762ca5c61cbf387ab95255f945e571322c20198e6
5
+ SHA512:
6
+ metadata.gz: fc14643a3acc9ff407475b259d078579388f429b1148505aef69ec6daefc06da36fa2fd9dd6696fd6b11ed9df4c52014f8af0b95958533fd3b19d57ddff9668b
7
+ data.tar.gz: a95638854a9b5b6c9fa4278dcc875c1bedf351b664664f7a27985ca28c30f36b0c704943f1dcfc7cb53d127799f74d0f0e7530a0db680683fcdca7f27f75aa1a
data/lib/hawker.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'hawker/request'
2
+ require 'hawker/drivers/default'
3
+ require 'hawker/drivers/github'
4
+ require 'hawker/drivers/instagram'
5
+ require 'hawker/drivers/twitter'
6
+ require 'hawker/mapper'
7
+
8
+ module Hawker
9
+ # Returns the driver instance mapped to the given URL
10
+ #
11
+ # @return [Object]
12
+ def self.get(url)
13
+ response = Hawker::Request.call(url)
14
+
15
+ Hawker::Mapper.map(url, response)
16
+ end
17
+
18
+ # The list of available drivers
19
+ #
20
+ # @return [Array<String>]
21
+ def self.drivers
22
+ [
23
+ "GitHub",
24
+ "Twitter",
25
+ "Instagram",
26
+ "Default"
27
+ ]
28
+ end
29
+ end
@@ -0,0 +1,32 @@
1
+ require 'nokogiri'
2
+
3
+ module Hawker
4
+ module Drivers
5
+ class Default
6
+ def initialize(response)
7
+ @response = response
8
+ end
9
+
10
+ # The current page title
11
+ #
12
+ # @return [String]
13
+ def page_title
14
+ node_text html.at("head title")
15
+ end
16
+
17
+ private
18
+
19
+ attr_reader :response
20
+
21
+ def html
22
+ @html ||= ::Nokogiri::HTML(response.body)
23
+ end
24
+
25
+ def node_text(node)
26
+ return if node.nil?
27
+
28
+ node.text.strip
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,71 @@
1
+ module Hawker
2
+ module Drivers
3
+ class Github < Hawker::Drivers::Default
4
+
5
+ # The current user full name
6
+ #
7
+ # @return [String]
8
+ def name
9
+ node_text html.at("h1.vcard-names span.vcard-fullname[itemprop='name']")
10
+ end
11
+
12
+ # The current user nickname
13
+ #
14
+ # @return [String]
15
+ def username
16
+ node_text html.at("h1.vcard-names span.vcard-username[itemprop='additionalName']")
17
+ end
18
+
19
+ # The current user company
20
+ #
21
+ # @return [String]
22
+ def company
23
+ node_text html.at("li[itemprop='worksFor'] div")
24
+ end
25
+
26
+ # The current user location
27
+ #
28
+ # @return [String]
29
+ def location
30
+ node_text html.at("li[itemprop='homeLocation'] span")
31
+ end
32
+
33
+ # The current user website
34
+ #
35
+ # @return [String]
36
+ def website
37
+ node = html.at("li[itemprop='url'] a")
38
+ node[:href] unless node.nil?
39
+ end
40
+
41
+ # The count of repositiories owned by the current user
42
+ #
43
+ # @return [Integer]
44
+ def repositories
45
+ node_text(html.at("a[title='Repositories'] span.Counter")).to_i
46
+ end
47
+
48
+ # The count of stars that the current user gave
49
+ #
50
+ # @return [Integer]
51
+ def stars
52
+ node_text(html.at("a[title='Stars'] span.Counter")).to_i
53
+ end
54
+
55
+ # The count of users following the current user
56
+ #
57
+ # @return [Integer]
58
+ def followers
59
+ node_text(html.at("a[title='Followers'] span.Counter")).to_i
60
+ end
61
+
62
+ # The count of users that the current user is following
63
+ #
64
+ # @return [Integer]
65
+ def following
66
+ node_text(html.at("a[title='Following'] span.Counter")).to_i
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,66 @@
1
+ require 'json'
2
+
3
+ module Hawker
4
+ module Drivers
5
+ class Instagram < Hawker::Drivers::Default
6
+
7
+ # The current user biography
8
+ #
9
+ # @return [String]
10
+ def biography
11
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["biography"]
12
+ end
13
+
14
+ # The current user full name
15
+ #
16
+ # @return [String]
17
+ def full_name
18
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["full_name"]
19
+ end
20
+
21
+ # The current user followers count
22
+ #
23
+ # @return [Integer]
24
+ def followers
25
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["edge_followed_by"]["count"]
26
+ end
27
+
28
+ # The number of accounts that the current user follows
29
+ #
30
+ # @return [Integer]
31
+ def following
32
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["edge_follow"]["count"]
33
+ end
34
+
35
+ # The current user external URL that is present in the biography
36
+ #
37
+ # @return [String]
38
+ def external_url
39
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["external_url"]
40
+ end
41
+
42
+ # The current user profile picture URL
43
+ #
44
+ # @return [String]
45
+ def profile_pic_url
46
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["profile_pic_url"]
47
+ end
48
+
49
+ # The current user Instagram username
50
+ #
51
+ # @return [String]
52
+ def username
53
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["username"]
54
+ end
55
+
56
+ private
57
+
58
+ def json
59
+ @json ||= begin
60
+ raw_json = html.to_s.match(/window\._sharedData =.*(?=;)/)
61
+ JSON.parse(raw_json.to_s.gsub("window._sharedData = ", ""))
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,82 @@
1
+ module Hawker
2
+ module Drivers
3
+ class Twitter < Hawker::Drivers::Default
4
+
5
+ # The current user name
6
+ #
7
+ # @return [String]
8
+ def name
9
+ node_text(html.at("h1.ProfileHeaderCard-name a"))
10
+ end
11
+
12
+ # The current user biography note
13
+ #
14
+ # @return [String]
15
+ def bio
16
+ node_text(html.at("p.ProfileHeaderCard-bio"))
17
+ end
18
+
19
+ # The current user location
20
+ #
21
+ # @return [String]
22
+ def location
23
+ node_text(html.at("span.ProfileHeaderCard-locationText"))
24
+ end
25
+
26
+ # The current user website
27
+ #
28
+ # @return [String]
29
+ def website
30
+ node = html.at("span.ProfileHeaderCard-urlText a")
31
+ node[:title] if !node.nil?
32
+ end
33
+
34
+ # The current user account creation date
35
+ #
36
+ # @return [String]
37
+ def joined
38
+ html.at("span.ProfileHeaderCard-joinDateText")[:title]
39
+ end
40
+
41
+ # The current user tweets count
42
+ #
43
+ # @return [Integer]
44
+ def tweets
45
+ node = html.at("a[data-nav='tweets'] span.ProfileNav-value")
46
+ node.nil? ? 0 : node["data-count"].to_i
47
+ end
48
+
49
+ # The current user followers count
50
+ #
51
+ # @return [Integer]
52
+ def followers
53
+ node = html.at("li.ProfileNav-item--followers span.ProfileNav-value")
54
+ node.nil? ? 0 : node["data-count"].to_i
55
+ end
56
+
57
+ # The count of accounts that the current user follows
58
+ #
59
+ # @return [Integer]
60
+ def following
61
+ node = html.at("li.ProfileNav-item--following span.ProfileNav-value")
62
+ node.nil? ? 0 : node["data-count"].to_i
63
+ end
64
+
65
+ # The current user likes count
66
+ #
67
+ # @return [Integer]
68
+ def likes
69
+ node = html.at("li.ProfileNav-item.ProfileNav-item--favorites span.ProfileNav-value")
70
+ node.nil? ? 0 : node["data-count"].to_i
71
+ end
72
+
73
+ # The current user lists count
74
+ #
75
+ # @return [Integer]
76
+ def lists
77
+ node_text(html.at("li.ProfileNav-item.ProfileNav-item--lists span.ProfileNav-value")).to_i
78
+ end
79
+
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,22 @@
1
+ require 'uri'
2
+
3
+ module Hawker
4
+ class Mapper
5
+ MAPPINGS = {
6
+ /(http|https):\/\/(www.|)github.com\/[a-zA-Z0-9_]*/ => Hawker::Drivers::Github,
7
+ /(http|https):\/\/(www.|)instagram.com\/[a-zA-Z0-9_]*/ => Hawker::Drivers::Instagram,
8
+ /(http|https):\/\/(www.|)twitter.com\/[a-zA-Z0-9_]*/ => Hawker::Drivers::Twitter,
9
+ "default" => Hawker::Drivers::Default
10
+ }.freeze
11
+
12
+ # Maps the given URL to the driver class
13
+ #
14
+ # @return [Object]
15
+ def self.map(url, response)
16
+ driver_mappings = MAPPINGS.find { |key, value| url.match(key) }
17
+ driver = driver_mappings.nil? ? MAPPINGS["default"] : driver_mappings.last
18
+
19
+ driver.new(response)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,13 @@
1
+ require 'rest-client'
2
+
3
+ module Hawker
4
+ class Request
5
+
6
+ # Performs the request to the given URL in order to get the page html
7
+ #
8
+ # @return [RestClient::Response]
9
+ def self.call(url)
10
+ ::RestClient.get(url, user_agent: 'Mozilla/5.0 Mac OS')
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,12 @@
1
+ module Hawker
2
+ module Version
3
+ module_function
4
+
5
+ # Gem current version
6
+ #
7
+ # @return [String]
8
+ def to_s
9
+ "0.0.0.1"
10
+ end
11
+ end
12
+ end
data/spec/helper.rb ADDED
@@ -0,0 +1,5 @@
1
+ def load_driver_fixture(driver)
2
+ file_path = File.expand_path("../fixtures/#{driver}.html", __FILE__)
3
+ html_page = File.open(file_path).read
4
+ response = double('response', body: html_page)
5
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hawker
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Paweł Dąbrowski
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-04-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rest-client
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.2
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '2.0'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.2
33
+ - !ruby/object:Gem::Dependency
34
+ name: nokogiri
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.8'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.8.2
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '1.8'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.8.2
53
+ - !ruby/object:Gem::Dependency
54
+ name: rspec
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '3.7'
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 3.7.0
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '3.7'
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 3.7.0
73
+ description: Scrap the most popular websites without the knowledge about the page
74
+ structure or API access
75
+ email: dziamber@gmail.com
76
+ executables: []
77
+ extensions: []
78
+ extra_rdoc_files: []
79
+ files:
80
+ - lib/hawker.rb
81
+ - lib/hawker/drivers/default.rb
82
+ - lib/hawker/drivers/github.rb
83
+ - lib/hawker/drivers/instagram.rb
84
+ - lib/hawker/drivers/twitter.rb
85
+ - lib/hawker/mapper.rb
86
+ - lib/hawker/request.rb
87
+ - lib/hawker/version.rb
88
+ - spec/helper.rb
89
+ homepage: http://github.com/rubyhero/hawker
90
+ licenses:
91
+ - MIT
92
+ metadata: {}
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 2.7.3
110
+ signing_key:
111
+ specification_version: 4
112
+ summary: Scrap the most popular websites without the knowledge about the page structure
113
+ or API access
114
+ test_files: []