brand_logo 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +32 -0
- data/LICENSE.txt +21 -0
- data/README.md +107 -0
- data/lib/brand_logo/config.rb +60 -0
- data/lib/brand_logo/errors.rb +10 -0
- data/lib/brand_logo/fetcher.rb +109 -0
- data/lib/brand_logo/html_parser.rb +63 -0
- data/lib/brand_logo/http_client.rb +59 -0
- data/lib/brand_logo/icon.rb +34 -0
- data/lib/brand_logo/image_analyzer.rb +36 -0
- data/lib/brand_logo/logging.rb +29 -0
- data/lib/brand_logo/strategies/base_strategy.rb +117 -0
- data/lib/brand_logo/strategies/duckduckgo_strategy.rb +45 -0
- data/lib/brand_logo/strategies/manifest_strategy.rb +137 -0
- data/lib/brand_logo/strategies/meta_tag_strategy.rb +95 -0
- data/lib/brand_logo/strategies/scraping/default_favicon_checker.rb +33 -0
- data/lib/brand_logo/strategies/scraping/dimensions_extractor.rb +42 -0
- data/lib/brand_logo/strategies/scraping/format_extractor.rb +56 -0
- data/lib/brand_logo/strategies/scraping/icon_finder.rb +79 -0
- data/lib/brand_logo/strategies/scraping/url_normalizer.rb +50 -0
- data/lib/brand_logo/strategies/scraping_strategy.rb +77 -0
- data/lib/brand_logo/version.rb +6 -0
- data/lib/brand_logo.rb +37 -0
- metadata +138 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: da46bcee230e23bbe43d0a20b21c13ef2eab0dd2faebcf8abfcf51ccb4d54fe0
|
|
4
|
+
data.tar.gz: 7c0888790d4919fa0e8d7cbbbfccdd3f5cbebc457f4206c64f34950f51cf423e
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: e589a8a91d7c41349997f0ed68a4f6f9ea3d6f53f2a7f98b796bf6491b3bc91b7301390db0074e200dcdfec21152bc88230d48605d0ec17e9bf7deb55eab4f68
|
|
7
|
+
data.tar.gz: b0b0763a1ec0a9ee78840997b9b759b0ff13fc94729db0426c24ddd3acb79f52d92c5f37c0d878fb047035f21915136da14b78026e4e2535eead61a73f471f73
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented here.
|
|
4
|
+
Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
5
|
+
|
|
6
|
+
## [1.0.0] - 2026-04-15
|
|
7
|
+
|
|
8
|
+
First public release under the name `brand_logo`.
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- `BrandLogo::Fetcher` entry point — accepts `config:` and `strategies:` keyword
|
|
13
|
+
arguments.
|
|
14
|
+
- `fetch_all(domain)` on `Fetcher` — returns every icon found across all
|
|
15
|
+
strategies, deduplicated by URL.
|
|
16
|
+
- **Four strategies**, tried in order:
|
|
17
|
+
- `ScrapingStrategy` — parses HTML `<link rel="icon">` tags, retries with
|
|
18
|
+
`https://www.` prefix and `http://` fallback.
|
|
19
|
+
- `MetaTagStrategy` — reads `og:image`, `twitter:image` meta tags.
|
|
20
|
+
- `ManifestStrategy` — parses PWA `manifest.json` `icons[]` entries.
|
|
21
|
+
- `DuckduckgoStrategy` — last-resort DuckDuckGo icon cache fallback.
|
|
22
|
+
- `BrandLogo::Config` — centralises `min_dimensions`, `max_dimensions`,
|
|
23
|
+
`allow_svg`, `timeout`, `max_hops`.
|
|
24
|
+
- `BrandLogo::Logging` — configurable via any stdlib `Logger`.
|
|
25
|
+
- Typed error hierarchy: `FetchError`, `NoIconFoundError`, `ValidationError`,
|
|
26
|
+
`ParseError`.
|
|
27
|
+
- Domain validation in `Fetcher#fetch` — raises `ValidationError` for invalid input.
|
|
28
|
+
- Configurable HTTP timeout (default 10 s).
|
|
29
|
+
- Dependency injection for `HttpClient`, `HtmlParser`, `ImageAnalyzer` — strategies are
|
|
30
|
+
fully testable without network calls.
|
|
31
|
+
- Sorbet `typed: strict` everywhere.
|
|
32
|
+
- RSpec test suite with **100% line coverage** (SimpleCov).
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AdVitam (https://www.advitam.fr)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# brand_logo
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/rb/brand_logo)
|
|
4
|
+
[](https://github.com/AdVitam/brand_logo/actions/workflows/main.yml)
|
|
5
|
+
|
|
6
|
+
Fetch the best logo or icon for any website from its domain.
|
|
7
|
+
|
|
8
|
+
`brand_logo` chains several strategies (favicon tags, Open Graph / Twitter meta
|
|
9
|
+
images, PWA web app manifests, DuckDuckGo fallback) and returns the best icon
|
|
10
|
+
based on format and dimensions.
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
```ruby
|
|
15
|
+
gem 'brand_logo'
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Usage
|
|
19
|
+
|
|
20
|
+
```ruby
|
|
21
|
+
require 'brand_logo'
|
|
22
|
+
|
|
23
|
+
fetcher = BrandLogo::Fetcher.new
|
|
24
|
+
icon = fetcher.fetch('github.com')
|
|
25
|
+
|
|
26
|
+
icon.url # => "https://github.com/favicon.svg"
|
|
27
|
+
icon.format # => "svg"
|
|
28
|
+
icon.dimensions # => { width: nil, height: nil }
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### All icons
|
|
32
|
+
|
|
33
|
+
```ruby
|
|
34
|
+
icons = fetcher.fetch_all('github.com')
|
|
35
|
+
# Every icon found across all strategies, deduplicated by URL
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Configuration
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
config = BrandLogo::Config.new(
|
|
42
|
+
min_dimensions: { width: 32, height: 32 }, # ignore tiny icons
|
|
43
|
+
max_dimensions: { width: 512, height: 512 }, # ignore oversized images
|
|
44
|
+
allow_svg: true, # prefer SVG when available
|
|
45
|
+
timeout: 10, # HTTP timeout in seconds
|
|
46
|
+
max_hops: 5 # max redirects to follow
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
fetcher = BrandLogo::Fetcher.new(config: config)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Custom strategy chain
|
|
53
|
+
|
|
54
|
+
Strategies are tried in order until one succeeds:
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
fetcher = BrandLogo::Fetcher.new(
|
|
58
|
+
strategies: [
|
|
59
|
+
BrandLogo::Strategies::ScrapingStrategy.new(config: config, ...),
|
|
60
|
+
BrandLogo::Strategies::DuckduckgoStrategy.new(config: config, ...)
|
|
61
|
+
]
|
|
62
|
+
)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Default chain: `ScrapingStrategy → MetaTagStrategy → ManifestStrategy → DuckduckgoStrategy`
|
|
66
|
+
|
|
67
|
+
### Logging
|
|
68
|
+
|
|
69
|
+
```ruby
|
|
70
|
+
require 'logger'
|
|
71
|
+
BrandLogo::Logging.logger.level = Logger::DEBUG # verbose output
|
|
72
|
+
BrandLogo::Logging.logger = MyCustomLogger.new # inject your own logger
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Strategies
|
|
76
|
+
|
|
77
|
+
| Strategy | Source | Notes |
|
|
78
|
+
|---|---|---|
|
|
79
|
+
| `ScrapingStrategy` | HTML `<link rel="icon">` tags | Primary — tries `https://`, `https://www.`, `http://` |
|
|
80
|
+
| `MetaTagStrategy` | `og:image`, `twitter:image` | High-res images, filter via `max_dimensions` |
|
|
81
|
+
| `ManifestStrategy` | PWA `manifest.json` `icons[]` | Best for progressive web apps |
|
|
82
|
+
| `DuckduckgoStrategy` | DuckDuckGo icon cache | Last-resort fallback |
|
|
83
|
+
|
|
84
|
+
## Error handling
|
|
85
|
+
|
|
86
|
+
```ruby
|
|
87
|
+
begin
|
|
88
|
+
icon = fetcher.fetch('example.com')
|
|
89
|
+
rescue BrandLogo::NoIconFoundError
|
|
90
|
+
# no icon found by any strategy
|
|
91
|
+
rescue BrandLogo::ValidationError => e
|
|
92
|
+
# invalid domain format: e.message
|
|
93
|
+
end
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Requirements
|
|
97
|
+
|
|
98
|
+
- Ruby >= 3.2
|
|
99
|
+
|
|
100
|
+
## Contributing
|
|
101
|
+
|
|
102
|
+
Bug reports and pull requests are welcome on GitHub at
|
|
103
|
+
<https://github.com/AdVitam/brand_logo>.
|
|
104
|
+
|
|
105
|
+
## License
|
|
106
|
+
|
|
107
|
+
Released under the MIT License. See [LICENSE.txt](LICENSE.txt).
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# typed: strict
|
|
3
|
+
|
|
4
|
+
require 'sorbet-runtime'
|
|
5
|
+
|
|
6
|
+
module BrandLogo
|
|
7
|
+
# Immutable configuration object for the gem.
|
|
8
|
+
# Centralizes all runtime parameters, replacing the 4 primitive kwargs
|
|
9
|
+
# previously duplicated across every strategy constructor.
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# config = BrandLogo::Config.new(min_dimensions: { width: 32, height: 32 }, timeout: 5)
|
|
13
|
+
class Config
|
|
14
|
+
extend T::Sig
|
|
15
|
+
|
|
16
|
+
DEFAULT_TIMEOUT = T.let(10, Integer)
|
|
17
|
+
DEFAULT_MAX_HOPS = T.let(5, Integer)
|
|
18
|
+
DEFAULT_MIN_DIMENSIONS = T.let({ width: 0, height: 0 }.freeze, T::Hash[Symbol, Integer])
|
|
19
|
+
DEFAULT_DIMENSIONS = T.let({ width: 16, height: 16 }.freeze, T::Hash[Symbol, Integer])
|
|
20
|
+
DEFAULT_FAVICON_PATH = T.let('/favicon.ico', String)
|
|
21
|
+
|
|
22
|
+
sig { returns(T::Hash[Symbol, Integer]) }
|
|
23
|
+
attr_reader :min_dimensions
|
|
24
|
+
|
|
25
|
+
sig { returns(T.nilable(T::Hash[Symbol, Integer])) }
|
|
26
|
+
attr_reader :max_dimensions
|
|
27
|
+
|
|
28
|
+
sig { returns(T::Boolean) }
|
|
29
|
+
attr_reader :allow_svg
|
|
30
|
+
|
|
31
|
+
sig { returns(Integer) }
|
|
32
|
+
attr_reader :timeout
|
|
33
|
+
|
|
34
|
+
sig { returns(Integer) }
|
|
35
|
+
attr_reader :max_hops
|
|
36
|
+
|
|
37
|
+
sig do
|
|
38
|
+
params(
|
|
39
|
+
min_dimensions: T::Hash[Symbol, Integer],
|
|
40
|
+
max_dimensions: T.nilable(T::Hash[Symbol, Integer]),
|
|
41
|
+
allow_svg: T::Boolean,
|
|
42
|
+
timeout: Integer,
|
|
43
|
+
max_hops: Integer
|
|
44
|
+
).void
|
|
45
|
+
end
|
|
46
|
+
def initialize(
|
|
47
|
+
min_dimensions: DEFAULT_MIN_DIMENSIONS,
|
|
48
|
+
max_dimensions: nil,
|
|
49
|
+
allow_svg: true,
|
|
50
|
+
timeout: DEFAULT_TIMEOUT,
|
|
51
|
+
max_hops: DEFAULT_MAX_HOPS
|
|
52
|
+
)
|
|
53
|
+
@min_dimensions = T.let(min_dimensions, T::Hash[Symbol, Integer])
|
|
54
|
+
@max_dimensions = T.let(max_dimensions, T.nilable(T::Hash[Symbol, Integer]))
|
|
55
|
+
@allow_svg = T.let(allow_svg, T::Boolean)
|
|
56
|
+
@timeout = T.let(timeout, Integer)
|
|
57
|
+
@max_hops = T.let(max_hops, Integer)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# typed: strict
|
|
3
|
+
|
|
4
|
+
require 'sorbet-runtime'
|
|
5
|
+
|
|
6
|
+
module BrandLogo
|
|
7
|
+
# Entry point for brand_logo retrieval.
|
|
8
|
+
#
|
|
9
|
+
# Composes a chain of strategies tried in order until one finds a valid icon.
|
|
10
|
+
# Dependencies (HTTP client, HTML parser, image analyzer) are instantiated once
|
|
11
|
+
# and shared across all strategies.
|
|
12
|
+
#
|
|
13
|
+
# Usage:
|
|
14
|
+
# # Default configuration
|
|
15
|
+
# icon = BrandLogo::Fetcher.new.fetch('github.com')
|
|
16
|
+
#
|
|
17
|
+
# # Custom config
|
|
18
|
+
# config = BrandLogo::Config.new(min_dimensions: { width: 32, height: 32 }, timeout: 5)
|
|
19
|
+
# icon = BrandLogo::Fetcher.new(config: config).fetch('github.com')
|
|
20
|
+
#
|
|
21
|
+
# # Custom strategy chain (OCP)
|
|
22
|
+
# fetcher = BrandLogo::Fetcher.new(strategies: [MyCustomStrategy.new(config: config)])
|
|
23
|
+
#
|
|
24
|
+
# # All icons from all strategies
|
|
25
|
+
# icons = BrandLogo::Fetcher.new.fetch_all('github.com')
|
|
26
|
+
class Fetcher
|
|
27
|
+
extend T::Sig
|
|
28
|
+
|
|
29
|
+
DOMAIN_PATTERN = T.let(/\A[a-z0-9\-.]+\.[a-z]{2,}\z/i, Regexp)
|
|
30
|
+
|
|
31
|
+
sig do
|
|
32
|
+
params(
|
|
33
|
+
config: T.nilable(Config),
|
|
34
|
+
strategies: T.nilable(T::Array[Strategies::BaseStrategy])
|
|
35
|
+
).void
|
|
36
|
+
end
|
|
37
|
+
def initialize(config: nil, strategies: nil)
|
|
38
|
+
@config = T.let(config || Config.new, Config)
|
|
39
|
+
@http_client = T.let(RealHttpClient.new(@config), HttpClient)
|
|
40
|
+
@image_analyzer = T.let(FastimageAnalyzer.new, ImageAnalyzer)
|
|
41
|
+
@html_parser = T.let(NokogiriParser.new, HtmlParser)
|
|
42
|
+
@strategies = T.let(strategies || build_default_strategies, T::Array[Strategies::BaseStrategy])
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns the best icon found for the domain across all strategies.
|
|
46
|
+
# Raises NoIconFoundError if no strategy finds a valid icon.
|
|
47
|
+
sig { params(domain: String).returns(Icon) }
|
|
48
|
+
def fetch(domain)
|
|
49
|
+
validate_domain!(domain)
|
|
50
|
+
BrandLogo::Logging.logger.debug("Fetching brand_logo for: #{domain}")
|
|
51
|
+
|
|
52
|
+
@strategies.each do |strategy|
|
|
53
|
+
BrandLogo::Logging.logger.debug("Trying #{strategy.class.name}")
|
|
54
|
+
icon = strategy.fetch(domain)
|
|
55
|
+
return icon if icon
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
raise NoIconFoundError, "No brand_logo found for #{domain}"
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Returns all icons found across every strategy, deduplicated by URL.
|
|
62
|
+
sig { params(domain: String).returns(T::Array[Icon]) }
|
|
63
|
+
def fetch_all(domain)
|
|
64
|
+
validate_domain!(domain)
|
|
65
|
+
|
|
66
|
+
@strategies
|
|
67
|
+
.flat_map { |strategy| strategy.fetch_all(domain) }
|
|
68
|
+
.uniq(&:url)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
sig { params(domain: String).void }
|
|
74
|
+
def validate_domain!(domain)
|
|
75
|
+
return if domain.match?(DOMAIN_PATTERN)
|
|
76
|
+
|
|
77
|
+
raise ValidationError, "Invalid domain: #{domain.inspect}"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
sig { returns(T::Array[Strategies::BaseStrategy]) }
|
|
81
|
+
def build_default_strategies
|
|
82
|
+
[
|
|
83
|
+
Strategies::ScrapingStrategy.new(
|
|
84
|
+
config: @config,
|
|
85
|
+
http_client: @http_client,
|
|
86
|
+
html_parser: @html_parser,
|
|
87
|
+
image_analyzer: @image_analyzer
|
|
88
|
+
),
|
|
89
|
+
Strategies::MetaTagStrategy.new(
|
|
90
|
+
config: @config,
|
|
91
|
+
http_client: @http_client,
|
|
92
|
+
html_parser: @html_parser,
|
|
93
|
+
image_analyzer: @image_analyzer
|
|
94
|
+
),
|
|
95
|
+
Strategies::ManifestStrategy.new(
|
|
96
|
+
config: @config,
|
|
97
|
+
http_client: @http_client,
|
|
98
|
+
html_parser: @html_parser,
|
|
99
|
+
image_analyzer: @image_analyzer
|
|
100
|
+
),
|
|
101
|
+
Strategies::DuckduckgoStrategy.new(
|
|
102
|
+
config: @config,
|
|
103
|
+
http_client: @http_client,
|
|
104
|
+
image_analyzer: @image_analyzer
|
|
105
|
+
)
|
|
106
|
+
]
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# typed: strict
|
|
3
|
+
|
|
4
|
+
require 'sorbet-runtime'
|
|
5
|
+
require 'nokogiri'
|
|
6
|
+
|
|
7
|
+
module BrandLogo
|
|
8
|
+
# Value Object wrapping a parsed HTML document.
|
|
9
|
+
# Exposes only the subset of the Nokogiri API that strategies need,
|
|
10
|
+
# preventing strategies from depending on Nokogiri directly.
|
|
11
|
+
class ParsedDocument
|
|
12
|
+
extend T::Sig
|
|
13
|
+
|
|
14
|
+
sig { params(doc: Nokogiri::HTML::Document).void }
|
|
15
|
+
def initialize(doc)
|
|
16
|
+
@doc = T.let(doc, Nokogiri::HTML::Document)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Returns all elements matching the CSS selector.
|
|
20
|
+
sig { params(selector: String).returns(T::Array[T.untyped]) }
|
|
21
|
+
def css(selector)
|
|
22
|
+
@doc.css(selector).to_a
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Returns the first element matching the CSS selector, or nil.
|
|
26
|
+
sig { params(selector: String).returns(T.untyped) }
|
|
27
|
+
def at(selector)
|
|
28
|
+
@doc.at(selector)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Returns the href of the <base> tag if present.
|
|
32
|
+
sig { returns(T.nilable(String)) }
|
|
33
|
+
def base_href
|
|
34
|
+
node = @doc.at('base')
|
|
35
|
+
return nil unless node
|
|
36
|
+
|
|
37
|
+
node['href']
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Interface for parsing raw HTML into a ParsedDocument.
|
|
42
|
+
# Strategies receive this via dependency injection.
|
|
43
|
+
module HtmlParser
|
|
44
|
+
extend T::Sig
|
|
45
|
+
extend T::Helpers
|
|
46
|
+
|
|
47
|
+
interface!
|
|
48
|
+
|
|
49
|
+
sig { abstract.params(html: String).returns(ParsedDocument) }
|
|
50
|
+
def parse(html); end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Concrete implementation using the Nokogiri gem.
|
|
54
|
+
class NokogiriParser
|
|
55
|
+
extend T::Sig
|
|
56
|
+
include HtmlParser
|
|
57
|
+
|
|
58
|
+
sig { override.params(html: String).returns(ParsedDocument) }
|
|
59
|
+
def parse(html)
|
|
60
|
+
ParsedDocument.new(Nokogiri::HTML(html))
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# typed: strict
|
|
3
|
+
|
|
4
|
+
require 'sorbet-runtime'
|
|
5
|
+
require 'http'
|
|
6
|
+
|
|
7
|
+
module BrandLogo
|
|
8
|
+
# Interface for HTTP operations used by strategies.
|
|
9
|
+
# Decouples strategies from the HTTP gem, enabling injection of test doubles.
|
|
10
|
+
module HttpClient
|
|
11
|
+
extend T::Sig
|
|
12
|
+
extend T::Helpers
|
|
13
|
+
|
|
14
|
+
interface!
|
|
15
|
+
|
|
16
|
+
# Fetches the body of a URL. Returns nil on any failure (network, non-2xx, timeout).
|
|
17
|
+
sig { abstract.params(url: String).returns(T.nilable(String)) }
|
|
18
|
+
def get_body(url); end
|
|
19
|
+
|
|
20
|
+
# Returns true if a HEAD request to the URL succeeds (2xx). Returns false on any failure.
|
|
21
|
+
sig { abstract.params(url: String).returns(T::Boolean) }
|
|
22
|
+
def head_success?(url); end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Concrete HTTP client using the `http` gem.
|
|
26
|
+
# Centralizes timeout and redirect configuration previously hardcoded
|
|
27
|
+
# across ScrapingStrategy, DuckduckgoStrategy, and UrlNormalizer.
|
|
28
|
+
class RealHttpClient
|
|
29
|
+
extend T::Sig
|
|
30
|
+
include HttpClient
|
|
31
|
+
|
|
32
|
+
sig { params(config: Config).void }
|
|
33
|
+
def initialize(config)
|
|
34
|
+
@timeout = T.let(config.timeout, Integer)
|
|
35
|
+
@max_hops = T.let(config.max_hops, Integer)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
sig { override.params(url: String).returns(T.nilable(String)) }
|
|
39
|
+
def get_body(url)
|
|
40
|
+
response = HTTP
|
|
41
|
+
.timeout(connect: @timeout, read: @timeout, write: @timeout)
|
|
42
|
+
.follow(max_hops: @max_hops)
|
|
43
|
+
.get(url)
|
|
44
|
+
response.status.success? ? response.body.to_s : nil
|
|
45
|
+
rescue StandardError
|
|
46
|
+
nil
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
sig { override.params(url: String).returns(T::Boolean) }
|
|
50
|
+
def head_success?(url)
|
|
51
|
+
response = HTTP
|
|
52
|
+
.timeout(connect: @timeout, read: @timeout, write: @timeout)
|
|
53
|
+
.head(url)
|
|
54
|
+
response.status.success?
|
|
55
|
+
rescue StandardError
|
|
56
|
+
false
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# typed: strict
|
|
3
|
+
|
|
4
|
+
require 'sorbet-runtime'
|
|
5
|
+
|
|
6
|
+
module BrandLogo
|
|
7
|
+
# Represents a brand_logo icon with its URL, dimensions and format
|
|
8
|
+
# Used to store and return brand_logo information across the gem
|
|
9
|
+
class Icon
|
|
10
|
+
extend T::Sig
|
|
11
|
+
|
|
12
|
+
sig { returns(String) }
|
|
13
|
+
attr_reader :url
|
|
14
|
+
|
|
15
|
+
sig { returns(T::Hash[Symbol, T.nilable(Integer)]) }
|
|
16
|
+
attr_reader :dimensions
|
|
17
|
+
|
|
18
|
+
sig { returns(String) }
|
|
19
|
+
attr_reader :format
|
|
20
|
+
|
|
21
|
+
sig do
|
|
22
|
+
params(
|
|
23
|
+
url: String,
|
|
24
|
+
dimensions: T::Hash[Symbol, T.nilable(Integer)],
|
|
25
|
+
format: String
|
|
26
|
+
).void
|
|
27
|
+
end
|
|
28
|
+
def initialize(url:, dimensions:, format:)
|
|
29
|
+
@url = url
|
|
30
|
+
@dimensions = dimensions
|
|
31
|
+
@format = format
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# typed: strict
|
|
3
|
+
|
|
4
|
+
require 'sorbet-runtime'
|
|
5
|
+
require 'fastimage'
|
|
6
|
+
|
|
7
|
+
module BrandLogo
|
|
8
|
+
# Interface for extracting image dimensions from a URL.
|
|
9
|
+
# Implementations are injected into strategies, enabling testing without network calls.
|
|
10
|
+
module ImageAnalyzer
|
|
11
|
+
extend T::Sig
|
|
12
|
+
extend T::Helpers
|
|
13
|
+
|
|
14
|
+
interface!
|
|
15
|
+
|
|
16
|
+
sig { abstract.params(url: String).returns(T::Hash[Symbol, T.nilable(Integer)]) }
|
|
17
|
+
def dimensions(url); end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Concrete implementation using the FastImage gem.
|
|
21
|
+
# Avoids downloading the full image by parsing only the header bytes.
|
|
22
|
+
class FastimageAnalyzer
|
|
23
|
+
extend T::Sig
|
|
24
|
+
include ImageAnalyzer
|
|
25
|
+
|
|
26
|
+
sig { override.params(url: String).returns(T::Hash[Symbol, T.nilable(Integer)]) }
|
|
27
|
+
def dimensions(url)
|
|
28
|
+
result = FastImage.size(url)
|
|
29
|
+
return { width: nil, height: nil } unless result
|
|
30
|
+
|
|
31
|
+
{ width: result[0], height: result[1] }
|
|
32
|
+
rescue StandardError
|
|
33
|
+
{ width: nil, height: nil }
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# typed: strict
|
|
3
|
+
|
|
4
|
+
require 'logger'
|
|
5
|
+
require 'sorbet-runtime'
|
|
6
|
+
|
|
7
|
+
module BrandLogo
|
|
8
|
+
# Centralized logging for the gem.
|
|
9
|
+
# Replaces the DebugLogger module's debug boolean pattern.
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# BrandLogo::Logging.logger.level = Logger::DEBUG # enable verbose output
|
|
13
|
+
# BrandLogo::Logging.logger = MyCustomLogger.new # inject custom logger
|
|
14
|
+
module Logging
|
|
15
|
+
extend T::Sig
|
|
16
|
+
|
|
17
|
+
@logger = T.let(::Logger.new($stderr, level: ::Logger::WARN), ::Logger)
|
|
18
|
+
|
|
19
|
+
sig { returns(::Logger) }
|
|
20
|
+
def self.logger
|
|
21
|
+
@logger
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
sig { params(logger: ::Logger).void }
|
|
25
|
+
def self.logger=(logger)
|
|
26
|
+
@logger = logger
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# typed: strict
|
|
3
|
+
|
|
4
|
+
require 'sorbet-runtime'
|
|
5
|
+
|
|
6
|
+
module BrandLogo
|
|
7
|
+
module Strategies
|
|
8
|
+
# Abstract base class implementing the Template Method pattern.
|
|
9
|
+
#
|
|
10
|
+
# Subclasses implement `fetch_all` (the customization point).
|
|
11
|
+
# `fetch` is the common algorithm: fetch_all → filter valid → select best.
|
|
12
|
+
# This eliminates duplicated selection/validation logic across strategies.
|
|
13
|
+
class BaseStrategy
|
|
14
|
+
extend T::Sig
|
|
15
|
+
extend T::Helpers
|
|
16
|
+
|
|
17
|
+
abstract!
|
|
18
|
+
|
|
19
|
+
UNKNOWN_DIMENSION_SCORE = T.let(-1, Integer)
|
|
20
|
+
|
|
21
|
+
sig { params(config: Config).void }
|
|
22
|
+
def initialize(config:)
|
|
23
|
+
@config = T.let(config, Config)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Returns ALL icons found for the domain (unfiltered).
|
|
27
|
+
# Subclasses must implement this method.
|
|
28
|
+
sig { abstract.params(domain: String).returns(T::Array[Icon]) }
|
|
29
|
+
def fetch_all(domain); end
|
|
30
|
+
|
|
31
|
+
# Returns the best valid icon for the domain, or nil if none found.
|
|
32
|
+
# Delegates to fetch_all, filters by validity, then selects the best.
|
|
33
|
+
sig { params(domain: String).returns(T.nilable(Icon)) }
|
|
34
|
+
def fetch(domain)
|
|
35
|
+
valid_icons = fetch_all(domain).select { |icon| valid_icon?(icon) }
|
|
36
|
+
select_best_icon(valid_icons)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
protected
|
|
40
|
+
|
|
41
|
+
sig { params(icon: Icon).returns(T::Boolean) }
|
|
42
|
+
def valid_icon?(icon)
|
|
43
|
+
valid_dimensions?(icon) && valid_format?(icon)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
sig { params(icon: Icon).returns(T::Boolean) }
|
|
49
|
+
def valid_dimensions?(icon)
|
|
50
|
+
return true if svg_icon?(icon)
|
|
51
|
+
return true if missing_dimensions?(icon)
|
|
52
|
+
|
|
53
|
+
meets_minimum_dimensions?(icon) && within_maximum_dimensions?(icon)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
sig { params(icon: Icon).returns(T::Boolean) }
|
|
57
|
+
def valid_format?(icon)
|
|
58
|
+
return true if icon.format != 'svg'
|
|
59
|
+
|
|
60
|
+
@config.allow_svg
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
sig { params(icons: T::Array[Icon]).returns(T.nilable(Icon)) }
|
|
64
|
+
def select_best_icon(icons)
|
|
65
|
+
return nil if icons.empty?
|
|
66
|
+
|
|
67
|
+
if @config.allow_svg
|
|
68
|
+
svg_icon = icons.find { |icon| icon.format == 'svg' }
|
|
69
|
+
return svg_icon if svg_icon
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
icons.max_by { |icon| icon_score(icon) }
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
sig { params(icon: Icon).returns(Integer) }
|
|
76
|
+
def icon_score(icon)
|
|
77
|
+
width = icon.dimensions[:width]
|
|
78
|
+
height = icon.dimensions[:height]
|
|
79
|
+
return UNKNOWN_DIMENSION_SCORE if width.nil? || height.nil?
|
|
80
|
+
|
|
81
|
+
width * height
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
sig { params(icon: Icon).returns(T::Boolean) }
|
|
85
|
+
def svg_icon?(icon)
|
|
86
|
+
@config.allow_svg && icon.format == 'svg'
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
sig { params(icon: Icon).returns(T::Boolean) }
|
|
90
|
+
def missing_dimensions?(icon)
|
|
91
|
+
icon.dimensions[:width].nil? || icon.dimensions[:height].nil?
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
sig { params(icon: Icon).returns(T::Boolean) }
|
|
95
|
+
def meets_minimum_dimensions?(icon)
|
|
96
|
+
min = @config.min_dimensions
|
|
97
|
+
width = icon.dimensions[:width]
|
|
98
|
+
height = icon.dimensions[:height]
|
|
99
|
+
return false if width.nil? || height.nil?
|
|
100
|
+
|
|
101
|
+
width >= min[:width] && height >= min[:height]
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
sig { params(icon: Icon).returns(T::Boolean) }
|
|
105
|
+
def within_maximum_dimensions?(icon)
|
|
106
|
+
max = @config.max_dimensions
|
|
107
|
+
return true if max.nil?
|
|
108
|
+
|
|
109
|
+
width = icon.dimensions[:width]
|
|
110
|
+
height = icon.dimensions[:height]
|
|
111
|
+
return true if width.nil? || height.nil?
|
|
112
|
+
|
|
113
|
+
width <= max[:width] && height <= max[:height]
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|