open_graph_fetcher 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 46670fff3dd827c35482bd63b1c246aa876fd85f82b3fdff752144e4f07e537b
4
+ data.tar.gz: a260be55faeb2038d4aeacfa9d419a1b7301ec023d327eb51fc2d5a1992ff260
5
+ SHA512:
6
+ metadata.gz: f23ee76f381d04b9fcc1bb60eb2f1baf4a438647009987a072f7a23334008a18614f11bb9a9cf74065e5deb695f65a3d8bed786ea2e6f32137c61a4c14a4ae06
7
+ data.tar.gz: 005b86f83da1f16ce55998bcefc1bceb93c0e0137bccab08e72ad1da2e749077fe2a0f5e85cd0bbf6abc005181cb16c2abab01adc1e2fa623344146743d198a7
@@ -0,0 +1,22 @@
1
+ name: CI
2
+ on:
3
+ push:
4
+ branches: [ master ]
5
+ pull_request:
6
+ branches: [ master ]
7
+ jobs:
8
+ test:
9
+ runs-on: ubuntu-latest
10
+ strategy:
11
+ fail-fast: false
12
+ matrix:
13
+ ruby-version: ['3.2', '3.3']
14
+ steps:
15
+ - uses: actions/checkout@v3
16
+ - name: Set up Ruby
17
+ uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: ${{ matrix.ruby-version }}
20
+ bundler-cache: true
21
+ - name: Run tests
22
+ run: bundle exec rspec
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright 2024 Marco Colli (collimarco.com)
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # OpenGraphFetcher
2
+
3
+ Fetch Open Graph metadata in a safer way.
4
+
5
+ - Includes some mitigations for SSRF attacks
6
+ - Blocks private and local IP ranges
7
+ - Avoids TOC/TOU when connecting to the IP
8
+ - Supports only HTTPS on the standard port (443)
9
+ - Includes request timeouts
10
+ - Avoids redirects
11
+ - Allows only text/html responses
12
+ - Returns only known OG properties and nothing else
13
+
14
+ ## Installation
15
+
16
+ ```ruby
17
+ gem 'open_graph_fetcher'
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ Basic usage:
23
+
24
+ ```ruby
25
+ url = "https://ogp.me"
26
+ fetcher = OpenGraphFetcher::Fetcher.new(url)
27
+ og_data = fetcher.fetch
28
+ puts og_data
29
+ ```
30
+
31
+ ## License
32
+
33
+ The gem is available as open source under the terms of the MIT License.
@@ -0,0 +1,11 @@
1
+ module OpenGraphFetcher
2
+ class Error < StandardError; end
3
+
4
+ class InvalidSchemeError < Error; end
5
+ class InvalidPortError < Error; end
6
+ class IPResolutionError < Error; end
7
+ class PrivateIPError < Error; end
8
+ class FetchError < Error; end
9
+ class ResponseError < Error; end
10
+ class InvalidContentTypeError < Error; end
11
+ end
@@ -0,0 +1,77 @@
1
+ require 'nokogiri'
2
+ require 'net/http'
3
+ require 'resolv'
4
+ require 'ipaddr'
5
+
6
+ module OpenGraphFetcher
7
+ class Fetcher
8
+ OG_PROPERTIES = %w[title type image url description].freeze
9
+
10
+ OPEN_TIMEOUT = 3
11
+ READ_TIMEOUT = 3
12
+
13
+ def self.fetch(url)
14
+ new(url).fetch
15
+ end
16
+
17
+ def initialize(url)
18
+ @url = url
19
+ end
20
+
21
+ def fetch
22
+ uri = URI.parse(@url)
23
+ raise InvalidSchemeError, "Only HTTPS URLs are allowed" unless uri.scheme == "https"
24
+ raise InvalidPortError, "Only the default HTTPS port (443) is allowed" if uri.port && uri.port != 443
25
+
26
+ ip_address = resolve_ip(uri)
27
+ raise PrivateIPError, "Resolved IP address is in a private or reserved range" if private_ip?(ip_address)
28
+
29
+ response = fetch_data(uri, ip_address)
30
+ raise ResponseError, "HTTP response is not ok: HTTP #{response.code} #{response.message}" unless response.code == "200"
31
+ raise InvalidContentTypeError, "Only HTML content is allowed" unless html_content?(response)
32
+
33
+ parse_open_graph_data(response.body)
34
+ end
35
+
36
+ private
37
+
38
+ def resolve_ip(uri)
39
+ Resolv.getaddress(uri.host)
40
+ rescue Resolv::ResolvError => e
41
+ raise IPResolutionError, "Could not resolve IP: #{e.message}"
42
+ end
43
+
44
+ def private_ip?(ip)
45
+ ip_addr = IPAddr.new(ip)
46
+ ip_addr.private? || ip_addr.link_local? || ip_addr.loopback?
47
+ end
48
+
49
+ def fetch_data(uri, ip)
50
+ request = Net::HTTP::Get.new(uri.request_uri)
51
+ Net::HTTP.start(uri.hostname, uri.port, ipaddr: ip, use_ssl: true, open_timeout: OPEN_TIMEOUT, read_timeout: READ_TIMEOUT) do |http|
52
+ http.request(request)
53
+ end
54
+ rescue Net::OpenTimeout, Net::ReadTimeout => e
55
+ raise FetchError, "Request timed out: #{e.message}"
56
+ rescue StandardError => e
57
+ raise FetchError, "Failed to fetch data: #{e.message}"
58
+ end
59
+
60
+ def html_content?(response)
61
+ content_type = response["Content-Type"]
62
+ content_type&.start_with?("text/html")
63
+ end
64
+
65
+ def parse_open_graph_data(html)
66
+ doc = Nokogiri::HTML(html)
67
+ og_data = {}
68
+
69
+ OG_PROPERTIES.each do |property|
70
+ meta_tag = doc.at_css("meta[property='og:#{property}']")
71
+ og_data[property] = meta_tag[:content] if meta_tag && meta_tag[:content]
72
+ end
73
+
74
+ og_data
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,3 @@
1
+ module OpenGraphFetcher
2
+ VERSION = '0.1.0'.freeze
3
+ end
@@ -0,0 +1,7 @@
1
+ require 'open_graph_fetcher/version'
2
+ require 'open_graph_fetcher/errors'
3
+ require 'open_graph_fetcher/fetcher'
4
+
5
+ module OpenGraphFetcher
6
+
7
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'lib/open_graph_fetcher/version'
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'open_graph_fetcher'
5
+ s.version = OpenGraphFetcher::VERSION
6
+ s.summary = 'Fetch Open Graph metadata in a safer way.'
7
+ s.author = 'Marco Colli'
8
+ s.homepage = 'https://github.com/collimarco/open_graph_fetcher'
9
+ s.license = 'MIT'
10
+ s.files = `git ls-files`.split("\n")
11
+ s.add_dependency 'nokogiri'
12
+ s.add_development_dependency 'rspec'
13
+ s.add_development_dependency 'webmock'
14
+ end
@@ -0,0 +1,101 @@
1
+ require 'open_graph_fetcher'
2
+ require 'webmock/rspec'
3
+
4
+ RSpec.describe OpenGraphFetcher::Fetcher do
5
+
6
+ before do
7
+ allow(Resolv).to receive(:getaddress).and_return("203.0.113.0")
8
+ end
9
+
10
+ describe ".fetch" do
11
+ context "when fetching Open Graph data from a valid HTTPS URL" do
12
+ it "successfully retrieves and parses Open Graph properties" do
13
+ body = <<-HTML
14
+ <html>
15
+ <head>
16
+ <meta property="og:title" content="Example Title">
17
+ <meta property="og:type" content="website">
18
+ <meta property="og:image" content="https://example.com/image.jpg">
19
+ <meta property="og:url" content="https://example.com/example-page">
20
+ <meta property="og:description" content="Example description">
21
+ </head>
22
+ </html>
23
+ HTML
24
+
25
+ stub_request(:get, "https://example.com/example-page").to_return(
26
+ status: 200,
27
+ body: body,
28
+ headers: { "Content-Type" => "text/html" }
29
+ )
30
+
31
+ og_data = OpenGraphFetcher::Fetcher.fetch("https://example.com/example-page")
32
+
33
+ expect(og_data).to eq({
34
+ "title" => "Example Title",
35
+ "type" => "website",
36
+ "image" => "https://example.com/image.jpg",
37
+ "url" => "https://example.com/example-page",
38
+ "description" => "Example description"
39
+ })
40
+ end
41
+ end
42
+
43
+ context "when given an HTTP URL" do
44
+ it "raises an InvalidSchemeError" do
45
+ expect { OpenGraphFetcher::Fetcher.fetch("http://example.com") }.to raise_error(OpenGraphFetcher::InvalidSchemeError)
46
+ end
47
+ end
48
+
49
+ context "when given a URL with a non-standard port" do
50
+ it "raises an InvalidPortError" do
51
+ expect { OpenGraphFetcher::Fetcher.fetch("https://example.com:8443") }.to raise_error(OpenGraphFetcher::InvalidPortError)
52
+ end
53
+ end
54
+
55
+ context "when the IP address cannot be resolved" do
56
+ it "raises an IPResolutionError with an appropriate error message" do
57
+ allow(Resolv).to receive(:getaddress).and_raise(Resolv::ResolvError, "DNS resolution failed")
58
+
59
+ expect { OpenGraphFetcher::Fetcher.fetch("https://nonexistent-domain.com") }.to raise_error(OpenGraphFetcher::IPResolutionError, /Could not resolve IP: DNS resolution failed/)
60
+ end
61
+ end
62
+
63
+ context "when the resolved IP address is private" do
64
+ it "raises a PrivateIPError" do
65
+ allow(Resolv).to receive(:getaddress).with("10.0.0.1").and_return("10.0.0.1")
66
+
67
+ expect { OpenGraphFetcher::Fetcher.fetch("https://10.0.0.1") }.to raise_error(OpenGraphFetcher::PrivateIPError)
68
+ end
69
+ end
70
+
71
+ context "when the HTTP response code is not 200" do
72
+ it "raises a FetchError with the response code and message" do
73
+ stub_request(:get, "https://example.com/nonexistent-page").to_return(
74
+ status: 404,
75
+ body: "Not Found"
76
+ )
77
+
78
+ expect { OpenGraphFetcher::Fetcher.fetch("https://example.com/nonexistent-page") }.to raise_error(OpenGraphFetcher::ResponseError, /HTTP response is not ok: HTTP 404/)
79
+ end
80
+ end
81
+
82
+ context "when a network timeout occurs" do
83
+ it "raises a FetchError with a timeout message" do
84
+ stub_request(:get, "https://example.com").to_timeout
85
+
86
+ expect { OpenGraphFetcher::Fetcher.fetch("https://example.com") }.to raise_error(OpenGraphFetcher::FetchError, /Request timed out/)
87
+ end
88
+ end
89
+
90
+ context "when the URL returns a non-HTML content type" do
91
+ it "raises an InvalidContentTypeError" do
92
+ stub_request(:get, "https://example.com/non-html").to_return(
93
+ status: 200,
94
+ headers: { "Content-Type" => "application/json" }
95
+ )
96
+
97
+ expect { OpenGraphFetcher::Fetcher.fetch("https://example.com/non-html") }.to raise_error(OpenGraphFetcher::InvalidContentTypeError)
98
+ end
99
+ end
100
+ end
101
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: open_graph_fetcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Marco Colli
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-11-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: webmock
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files: []
60
+ files:
61
+ - ".github/workflows/ci.yml"
62
+ - ".gitignore"
63
+ - Gemfile
64
+ - LICENSE
65
+ - README.md
66
+ - lib/open_graph_fetcher.rb
67
+ - lib/open_graph_fetcher/errors.rb
68
+ - lib/open_graph_fetcher/fetcher.rb
69
+ - lib/open_graph_fetcher/version.rb
70
+ - open_graph_fetcher.gemspec
71
+ - spec/fetcher_spec.rb
72
+ homepage: https://github.com/collimarco/open_graph_fetcher
73
+ licenses:
74
+ - MIT
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubygems_version: 3.5.16
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: Fetch Open Graph metadata in a safer way.
95
+ test_files: []