google_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 76ced4f6c307e04166d79f3cdff8eef98cbf76cf3668d8a1945edd0b9185c1c9
4
+ data.tar.gz: 231973730cea75a1be4698cbce1e0f537f4c56f698892e522c96cefd9536d0bd
5
+ SHA512:
6
+ metadata.gz: adec3d65b94f63cdd17bf3197d84a31361a215549c0774dfee58aa8b053bab3a760d2d73defee81ada1273c56460b4957aae7589eeb1610505d7cc39decfcd49
7
+ data.tar.gz: a65ecb950d44c8bfc1510a7bc0aa8e3838709b038a7248c85d323bef5747e97a9e21d7af3e987a019b0fdb491040c8fbeb6c2f593f120309d5a937e5ce097973
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # GoogleParser
2
+
3
+ TODO: Delete this and the text below, and describe your gem
4
+
5
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/google_parser`. To experiment with that code, run `bin/console` for an interactive prompt.
6
+
7
+ ## Installation
8
+
9
+ TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
10
+
11
+ Install the gem and add to the application's Gemfile by executing:
12
+
13
+ $ bundle add UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG
14
+
15
+ If bundler is not being used to manage dependencies, install the gem by executing:
16
+
17
+ $ gem install UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Development
24
+
25
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
26
+
27
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
28
+
29
+ ## Contributing
30
+
31
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/google_parser.
32
+
33
+ ## License
34
+
35
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GoogleParser
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "google_parser/version"
4
+
5
+ require "nokogiri"
6
+
7
+ module GoogleParser
8
+ class InvalidJsModelError < StandardError; end
9
+
10
+ class GoogleParser
11
+ attr_reader :raw_html, :doc, :organic_results, :jsmodel
12
+
13
+ def initialize(raw_html)
14
+ @raw_html = raw_html
15
+ @doc = Nokogiri::HTML(raw_html)
16
+ @organic_results = []
17
+ @jsmodel = @doc.css("body").attr("jsmodel").value
18
+
19
+ parse_organic_results
20
+ rescue StandardError => e
21
+ raise "Could not parse HTML"
22
+ end
23
+
24
+ def selectors
25
+ @selectors ||= {
26
+ "hspDDf" => {
27
+ organic_results: {
28
+ container: "div.Gx5Zad.fP1Qef.xpd.EtOod.pkphOe",
29
+ title: "h3 > div",
30
+ description: "div.BNeawe.s3v9rd.AP7Wnd",
31
+ breadcrumbs: "div.BNeawe.UPmit.AP7Wnd.lRVwie",
32
+ url: "a[href]",
33
+ }
34
+ }
35
+ }[@jsmodel] || raise("Could not find selectors for jsmodel: \"#{@jsmodel}\"")
36
+ end
37
+
38
+ private
39
+ def parse_organic_results
40
+ result_elements = @doc.css(selectors.dig(:organic_results, :container))
41
+ @organic_results = result_elements.map do |result_element|
42
+ url = parse_google_url(result_element.css(selectors.dig(:organic_results, :url)))
43
+ domain = extract_domain(url)
44
+ root_domain = domain.gsub("www.", "")
45
+ {
46
+ position: result_elements.index(result_element) + 1,
47
+ title: result_element.css(selectors.dig(:organic_results, :title)).text&.strip,
48
+ description: result_element.css(selectors.dig(:organic_results, :description)).text&.strip,
49
+ breadcrumbs: result_element.css(selectors.dig(:organic_results, :breadcrumbs)).text&.strip,
50
+ url: url,
51
+ domain: domain,
52
+ root_domain: root_domain
53
+ }
54
+ end
55
+ end
56
+
57
+ def extract_domain(url)
58
+ URI.parse(url).host
59
+ end
60
+
61
+ def parse_google_url(full_google_uri)
62
+ href = full_google_uri.attr("href").value
63
+ if href.start_with?("/url?q=")
64
+ href.match(/\/url\?q=(.*?)&/)[1]
65
+ else
66
+ href
67
+ end
68
+ end
69
+ end
70
+ # Your code goes here...
71
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Rasmus Kjellberg
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-08-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.15.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.15.3
27
+ description: Write a longer description or delete this line.
28
+ email:
29
+ - 2277443+kjellberg@users.noreply.github.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - README.md
35
+ - lib/google_parser.rb
36
+ - lib/google_parser/version.rb
37
+ homepage: https://rankzon.com
38
+ licenses:
39
+ - MIT
40
+ metadata:
41
+ homepage_uri: https://rankzon.com
42
+ source_code_uri: https://github.com/kjellberg/google_parser
43
+ changelog_uri: https://github.com/kjellberg/google_parser/blob/master/CHANGELOG.md
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 2.6.0
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubygems_version: 3.4.10
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Write a short summary, because RubyGems requires one.
63
+ test_files: []