hawker 0.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 51136fdddf3a7c5b7f533100dea0deb5167569946943d9719b23f6a19f1353f8
4
+ data.tar.gz: 8d7e8615ad452f0f1145c68762ca5c61cbf387ab95255f945e571322c20198e6
5
+ SHA512:
6
+ metadata.gz: fc14643a3acc9ff407475b259d078579388f429b1148505aef69ec6daefc06da36fa2fd9dd6696fd6b11ed9df4c52014f8af0b95958533fd3b19d57ddff9668b
7
+ data.tar.gz: a95638854a9b5b6c9fa4278dcc875c1bedf351b664664f7a27985ca28c30f36b0c704943f1dcfc7cb53d127799f74d0f0e7530a0db680683fcdca7f27f75aa1a
data/lib/hawker.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'hawker/request'
2
+ require 'hawker/drivers/default'
3
+ require 'hawker/drivers/github'
4
+ require 'hawker/drivers/instagram'
5
+ require 'hawker/drivers/twitter'
6
+ require 'hawker/mapper'
7
+
8
+ module Hawker
9
+ # Returns the driver instance mapped to the given URL
10
+ #
11
+ # @return [Object]
12
+ def self.get(url)
13
+ response = Hawker::Request.call(url)
14
+
15
+ Hawker::Mapper.map(url, response)
16
+ end
17
+
18
+ # The list of available drivers
19
+ #
20
+ # @return [Array<String>]
21
+ def self.drivers
22
+ [
23
+ "GitHub",
24
+ "Twitter",
25
+ "Instagram",
26
+ "Default"
27
+ ]
28
+ end
29
+ end
@@ -0,0 +1,32 @@
1
+ require 'nokogiri'
2
+
3
+ module Hawker
4
+ module Drivers
5
+ class Default
6
+ def initialize(response)
7
+ @response = response
8
+ end
9
+
10
+ # The current page title
11
+ #
12
+ # @return [String]
13
+ def page_title
14
+ node_text html.at("head title")
15
+ end
16
+
17
+ private
18
+
19
+ attr_reader :response
20
+
21
+ def html
22
+ @html ||= ::Nokogiri::HTML(response.body)
23
+ end
24
+
25
+ def node_text(node)
26
+ return if node.nil?
27
+
28
+ node.text.strip
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,71 @@
1
+ module Hawker
2
+ module Drivers
3
+ class Github < Hawker::Drivers::Default
4
+
5
+ # The current user full name
6
+ #
7
+ # @return [String]
8
+ def name
9
+ node_text html.at("h1.vcard-names span.vcard-fullname[itemprop='name']")
10
+ end
11
+
12
+ # The current user nickname
13
+ #
14
+ # @return [String]
15
+ def username
16
+ node_text html.at("h1.vcard-names span.vcard-username[itemprop='additionalName']")
17
+ end
18
+
19
+ # The current user company
20
+ #
21
+ # @return [String]
22
+ def company
23
+ node_text html.at("li[itemprop='worksFor'] div")
24
+ end
25
+
26
+ # The current user location
27
+ #
28
+ # @return [String]
29
+ def location
30
+ node_text html.at("li[itemprop='homeLocation'] span")
31
+ end
32
+
33
+ # The current user website
34
+ #
35
+ # @return [String]
36
+ def website
37
+ node = html.at("li[itemprop='url'] a")
38
+ node[:href] unless node.nil?
39
+ end
40
+
41
+ # The count of repositiories owned by the current user
42
+ #
43
+ # @return [Integer]
44
+ def repositories
45
+ node_text(html.at("a[title='Repositories'] span.Counter")).to_i
46
+ end
47
+
48
+ # The count of stars that the current user gave
49
+ #
50
+ # @return [Integer]
51
+ def stars
52
+ node_text(html.at("a[title='Stars'] span.Counter")).to_i
53
+ end
54
+
55
+ # The count of users following the current user
56
+ #
57
+ # @return [Integer]
58
+ def followers
59
+ node_text(html.at("a[title='Followers'] span.Counter")).to_i
60
+ end
61
+
62
+ # The count of users that the current user is following
63
+ #
64
+ # @return [Integer]
65
+ def following
66
+ node_text(html.at("a[title='Following'] span.Counter")).to_i
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,66 @@
1
+ require 'json'
2
+
3
+ module Hawker
4
+ module Drivers
5
+ class Instagram < Hawker::Drivers::Default
6
+
7
+ # The current user biography
8
+ #
9
+ # @return [String]
10
+ def biography
11
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["biography"]
12
+ end
13
+
14
+ # The current user full name
15
+ #
16
+ # @return [String]
17
+ def full_name
18
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["full_name"]
19
+ end
20
+
21
+ # The current user followers count
22
+ #
23
+ # @return [Integer]
24
+ def followers
25
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["edge_followed_by"]["count"]
26
+ end
27
+
28
+ # The number of accounts that the current user follows
29
+ #
30
+ # @return [Integer]
31
+ def following
32
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["edge_follow"]["count"]
33
+ end
34
+
35
+ # The current user external URL that is present in the biography
36
+ #
37
+ # @return [String]
38
+ def external_url
39
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["external_url"]
40
+ end
41
+
42
+ # The current user profile picture URL
43
+ #
44
+ # @return [String]
45
+ def profile_pic_url
46
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["profile_pic_url"]
47
+ end
48
+
49
+ # The current user Instagram username
50
+ #
51
+ # @return [String]
52
+ def username
53
+ json["entry_data"]["ProfilePage"].first["graphql"]["user"]["username"]
54
+ end
55
+
56
+ private
57
+
58
+ def json
59
+ @json ||= begin
60
+ raw_json = html.to_s.match(/window\._sharedData =.*(?=;)/)
61
+ JSON.parse(raw_json.to_s.gsub("window._sharedData = ", ""))
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,82 @@
1
+ module Hawker
2
+ module Drivers
3
+ class Twitter < Hawker::Drivers::Default
4
+
5
+ # The current user name
6
+ #
7
+ # @return [String]
8
+ def name
9
+ node_text(html.at("h1.ProfileHeaderCard-name a"))
10
+ end
11
+
12
+ # The current user biography note
13
+ #
14
+ # @return [String]
15
+ def bio
16
+ node_text(html.at("p.ProfileHeaderCard-bio"))
17
+ end
18
+
19
+ # The current user location
20
+ #
21
+ # @return [String]
22
+ def location
23
+ node_text(html.at("span.ProfileHeaderCard-locationText"))
24
+ end
25
+
26
+ # The current user website
27
+ #
28
+ # @return [String]
29
+ def website
30
+ node = html.at("span.ProfileHeaderCard-urlText a")
31
+ node[:title] if !node.nil?
32
+ end
33
+
34
+ # The current user account creation date
35
+ #
36
+ # @return [String]
37
+ def joined
38
+ html.at("span.ProfileHeaderCard-joinDateText")[:title]
39
+ end
40
+
41
+ # The current user tweets count
42
+ #
43
+ # @return [Integer]
44
+ def tweets
45
+ node = html.at("a[data-nav='tweets'] span.ProfileNav-value")
46
+ node.nil? ? 0 : node["data-count"].to_i
47
+ end
48
+
49
+ # The current user followers count
50
+ #
51
+ # @return [Integer]
52
+ def followers
53
+ node = html.at("li.ProfileNav-item--followers span.ProfileNav-value")
54
+ node.nil? ? 0 : node["data-count"].to_i
55
+ end
56
+
57
+ # The count of accounts that the current user follows
58
+ #
59
+ # @return [Integer]
60
+ def following
61
+ node = html.at("li.ProfileNav-item--following span.ProfileNav-value")
62
+ node.nil? ? 0 : node["data-count"].to_i
63
+ end
64
+
65
+ # The current user likes count
66
+ #
67
+ # @return [Integer]
68
+ def likes
69
+ node = html.at("li.ProfileNav-item.ProfileNav-item--favorites span.ProfileNav-value")
70
+ node.nil? ? 0 : node["data-count"].to_i
71
+ end
72
+
73
+ # The current user lists count
74
+ #
75
+ # @return [Integer]
76
+ def lists
77
+ node_text(html.at("li.ProfileNav-item.ProfileNav-item--lists span.ProfileNav-value")).to_i
78
+ end
79
+
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,22 @@
1
+ require 'uri'
2
+
3
+ module Hawker
4
+ class Mapper
5
+ MAPPINGS = {
6
+ /(http|https):\/\/(www.|)github.com\/[a-zA-Z0-9_]*/ => Hawker::Drivers::Github,
7
+ /(http|https):\/\/(www.|)instagram.com\/[a-zA-Z0-9_]*/ => Hawker::Drivers::Instagram,
8
+ /(http|https):\/\/(www.|)twitter.com\/[a-zA-Z0-9_]*/ => Hawker::Drivers::Twitter,
9
+ "default" => Hawker::Drivers::Default
10
+ }.freeze
11
+
12
+ # Maps the given URL to the driver class
13
+ #
14
+ # @return [Object]
15
+ def self.map(url, response)
16
+ driver_mappings = MAPPINGS.find { |key, value| url.match(key) }
17
+ driver = driver_mappings.nil? ? MAPPINGS["default"] : driver_mappings.last
18
+
19
+ driver.new(response)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,13 @@
1
+ require 'rest-client'
2
+
3
+ module Hawker
4
+ class Request
5
+
6
+ # Performs the request to the given URL in order to get the page html
7
+ #
8
+ # @return [RestClient::Response]
9
+ def self.call(url)
10
+ ::RestClient.get(url, user_agent: 'Mozilla/5.0 Mac OS')
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,12 @@
1
+ module Hawker
2
+ module Version
3
+ module_function
4
+
5
+ # Gem current version
6
+ #
7
+ # @return [String]
8
+ def to_s
9
+ "0.0.0.1"
10
+ end
11
+ end
12
+ end
data/spec/helper.rb ADDED
@@ -0,0 +1,5 @@
1
+ def load_driver_fixture(driver)
2
+ file_path = File.expand_path("../fixtures/#{driver}.html", __FILE__)
3
+ html_page = File.open(file_path).read
4
+ response = double('response', body: html_page)
5
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hawker
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Paweł Dąbrowski
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-04-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rest-client
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.2
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '2.0'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.2
33
+ - !ruby/object:Gem::Dependency
34
+ name: nokogiri
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.8'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.8.2
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '1.8'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.8.2
53
+ - !ruby/object:Gem::Dependency
54
+ name: rspec
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '3.7'
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 3.7.0
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '3.7'
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 3.7.0
73
+ description: Scrap the most popular websites without the knowledge about the page
74
+ structure or API access
75
+ email: dziamber@gmail.com
76
+ executables: []
77
+ extensions: []
78
+ extra_rdoc_files: []
79
+ files:
80
+ - lib/hawker.rb
81
+ - lib/hawker/drivers/default.rb
82
+ - lib/hawker/drivers/github.rb
83
+ - lib/hawker/drivers/instagram.rb
84
+ - lib/hawker/drivers/twitter.rb
85
+ - lib/hawker/mapper.rb
86
+ - lib/hawker/request.rb
87
+ - lib/hawker/version.rb
88
+ - spec/helper.rb
89
+ homepage: http://github.com/rubyhero/hawker
90
+ licenses:
91
+ - MIT
92
+ metadata: {}
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 2.7.3
110
+ signing_key:
111
+ specification_version: 4
112
+ summary: Scrap the most popular websites without the knowledge about the page structure
113
+ or API access
114
+ test_files: []