earl 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ruby-tests.yml +32 -0
- data/.gitignore +20 -4
- data/.rubocop.yml +35 -0
- data/.rubocop_todo.yml +22 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +15 -3
- data/Guardfile +9 -4
- data/LICENSE +3 -1
- data/{README.rdoc → README.md} +44 -46
- data/Rakefile +7 -32
- data/earl.gemspec +19 -20
- data/lib/earl/earl.rb +63 -49
- data/lib/earl/scraper.rb +16 -17
- data/lib/earl/version.rb +3 -1
- data/lib/earl.rb +6 -0
- data/spec/fixtures/cassettes/feed/is_atom_feed.yml +2298 -0
- data/spec/fixtures/cassettes/feed/is_rss_feed.yml +48 -0
- data/spec/fixtures/cassettes/feed/no_feed.yml +69 -0
- data/spec/fixtures/cassettes/feed/with_atom_and_rss_feed.yml +1471 -0
- data/spec/fixtures/cassettes/feed/with_rss_feed.yml +47 -0
- data/spec/fixtures/cassettes/oembed/no_oembed.yml +101 -0
- data/spec/fixtures/cassettes/oembed/youtube_oembed.yml +129 -0
- data/spec/integration/feed_spec.rb +54 -54
- data/spec/integration/oembed_spec.rb +23 -27
- data/spec/spec_helper.rb +4 -2
- data/spec/support/fixtures.rb +8 -3
- data/spec/support/vcr.rb +9 -0
- data/spec/unit/earl/earl_spec.rb +5 -6
- data/spec/unit/earl/feed_spec.rb +29 -26
- data/spec/unit/earl/oembed_spec.rb +24 -23
- data/spec/unit/earl/scraper_spec.rb +19 -18
- data/spec/unit/earl_spec.rb +39 -30
- metadata +48 -97
- data/.document +0 -5
- data/.rspec +0 -1
- data/.travis.yml +0 -11
- data/Gemfile.lock +0 -60
- data/script/console +0 -10
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: bfb59215c79763794ebf311cff3e939a0be46e4557abf8bbd8c358693b12e1ee
|
4
|
+
data.tar.gz: 550ea900108a04f4d12935bcd7fa398b95c886f59847c2492a7d90b96495f042
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 16102c9eee3ff031411da1aacd80371a7474ea3ff5b70dad9a77ad83efe63a943db7b845e8b461a4c44de7685e8933c903a2c2511c41df00f69759cd06ece54c
|
7
|
+
data.tar.gz: 3b724772932a18d2de4f589959c28968fa9cc93cf158bca8ac4360166ae3b04877e8bb6dc09338329da94fbbccee4891819598f1e0a4a642d858d13362f9219f
|
@@ -0,0 +1,32 @@
|
|
1
|
+
name: Ruby Tests
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches:
|
6
|
+
- main
|
7
|
+
pull_request:
|
8
|
+
branches:
|
9
|
+
- main
|
10
|
+
|
11
|
+
jobs:
|
12
|
+
test:
|
13
|
+
runs-on: ubuntu-latest
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- name: Checkout code
|
17
|
+
uses: actions/checkout@v3
|
18
|
+
|
19
|
+
- name: Set up Ruby
|
20
|
+
uses: ruby/setup-ruby@v1
|
21
|
+
with:
|
22
|
+
ruby-version: 3.3
|
23
|
+
bundler-cache: true
|
24
|
+
|
25
|
+
- name: Install dependencies
|
26
|
+
run: bundle install
|
27
|
+
|
28
|
+
- name: Run tests
|
29
|
+
run: bundle exec rspec
|
30
|
+
|
31
|
+
- name: Run rubocop
|
32
|
+
run: bundle exec rubocop
|
data/.gitignore
CHANGED
@@ -1,6 +1,22 @@
|
|
1
|
-
*.
|
2
|
-
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
3
9
|
coverage
|
4
|
-
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
5
12
|
pkg
|
6
|
-
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.bundle
|
19
|
+
*.so
|
20
|
+
*.o
|
21
|
+
*.a
|
22
|
+
mkmf.log
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
AllCops:
|
4
|
+
NewCops: disable
|
5
|
+
SuggestExtensions: false
|
6
|
+
TargetRubyVersion: 3.0
|
7
|
+
|
8
|
+
Layout/LineLength:
|
9
|
+
Max: 199
|
10
|
+
|
11
|
+
Metrics/BlockLength:
|
12
|
+
Max: 63
|
13
|
+
|
14
|
+
Metrics/ClassLength:
|
15
|
+
Max: 200
|
16
|
+
|
17
|
+
Metrics/CyclomaticComplexity:
|
18
|
+
Max: 8
|
19
|
+
|
20
|
+
Metrics/MethodLength:
|
21
|
+
Max: 20
|
22
|
+
|
23
|
+
Style/ClassVars:
|
24
|
+
Exclude:
|
25
|
+
- 'lib/earl/scraper.rb'
|
26
|
+
|
27
|
+
Style/Documentation:
|
28
|
+
Exclude:
|
29
|
+
- 'spec/**/*'
|
30
|
+
|
31
|
+
Style/ClassAndModuleChildren:
|
32
|
+
EnforcedStyleForClasses: compact
|
33
|
+
|
34
|
+
Layout/FirstHashElementIndentation:
|
35
|
+
EnforcedStyle: consistent
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2025-07-29 10:15:32 UTC using RuboCop version 1.79.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 7
|
10
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
11
|
+
# Configuration parameters: AllowSafeAssignment.
|
12
|
+
Lint/AssignmentInCondition:
|
13
|
+
Exclude:
|
14
|
+
- 'lib/earl/earl.rb'
|
15
|
+
- 'lib/earl/scraper.rb'
|
16
|
+
|
17
|
+
# Offense count: 2
|
18
|
+
# Configuration parameters: AllowedMethods.
|
19
|
+
# AllowedMethods: enums
|
20
|
+
Lint/ConstantDefinitionInBlock:
|
21
|
+
Exclude:
|
22
|
+
- 'spec/unit/earl/scraper_spec.rb'
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
earl
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.3.3
|
data/Gemfile
CHANGED
@@ -1,4 +1,16 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
# Specify gem dependencies in earl.gemspec
|
6
|
+
gemspec
|
7
|
+
|
8
|
+
# development dependencies
|
9
|
+
gem 'bundler', '>= 2.2.33'
|
10
|
+
gem 'guard-rspec'
|
11
|
+
gem 'guard-rubocop'
|
12
|
+
gem 'rake'
|
13
|
+
gem 'rspec'
|
14
|
+
gem 'rubocop', require: false
|
15
|
+
gem 'vcr'
|
16
|
+
gem 'webmock'
|
data/Guardfile
CHANGED
@@ -1,10 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# A sample Guardfile
|
2
4
|
# More info at https://github.com/guard/guard#readme
|
3
5
|
|
4
|
-
guard
|
6
|
+
guard :rspec, cmd: 'bundle exec rspec' do
|
5
7
|
watch(%r{^spec/.+_spec\.rb$})
|
6
|
-
watch('spec/spec_helper.rb')
|
7
|
-
watch(%r{^lib/(.+)\.rb$})
|
8
|
-
|
8
|
+
watch('spec/spec_helper.rb') { 'spec' }
|
9
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/unit/#{m[1]}_spec.rb" }
|
9
10
|
end
|
10
11
|
|
12
|
+
guard :rubocop, cli: ['--display-cop-names'] do
|
13
|
+
watch(/.+\.rb$/)
|
14
|
+
watch(%r{(?:.+/)?\.rubocop(?:_todo)?\.yml$}) { |m| File.dirname(m[0]) }
|
15
|
+
end
|
data/LICENSE
CHANGED
data/{README.rdoc → README.md}
RENAMED
@@ -1,20 +1,20 @@
|
|
1
|
-
|
1
|
+
# Earl
|
2
2
|
|
3
3
|
Earl wants to help you scrape all the relevant metadata for your favorite web pages so you can be as cool as
|
4
4
|
Facebook when displaying user-submitted link content. Earl returns details like titles, descriptions, content type,
|
5
5
|
associated feeds, and OEmbed definitions if available.
|
6
6
|
|
7
|
-
Earl is based on an original source project called _earl_ by
|
8
|
-
The revamp was done by
|
9
|
-
available at https://github.com/evendis/earl
|
7
|
+
Earl is based on an original source project called _earl_ by [teejayvanslyke](https://github.com/teejayvanslyke/earl) (but never released as a gem).
|
8
|
+
The revamp was done by [Paul Gallagher](https://github.com/tardate), and master source is currently
|
9
|
+
available at <https://github.com/evendis/earl>.
|
10
10
|
|
11
|
-
The Earl gem is officially named _earl_. Big thanks go to
|
11
|
+
The Earl gem is officially named _earl_. Big thanks go to [jeremyruppel](https://github.com/jeremyruppel) who
|
12
12
|
contributed the ownership of the _earl_ gem name. The original _earl_ gem had a somewhat similar purpose - it is now defunct, but still available up to version 0.3.0 via rubgems. Any _earl_ gem with version 1.0.0 or higher is the new gem release (and is in no way backwardly compatible with
|
13
13
|
earlier versions).
|
14
14
|
|
15
|
-
|
15
|
+
## The Earl Cookbook
|
16
16
|
|
17
|
-
|
17
|
+
### How do instantiate Earl?
|
18
18
|
|
19
19
|
Pass any url-like string to Earl:
|
20
20
|
|
@@ -23,8 +23,7 @@ Pass any url-like string to Earl:
|
|
23
23
|
# or using the []= convenience method:
|
24
24
|
my_earl_instance = Earl['https://github.com/evendis/earl']
|
25
25
|
|
26
|
-
|
27
|
-
=== How do I inspect details of the page?
|
26
|
+
### How do I inspect details of the page?
|
28
27
|
|
29
28
|
earl = Earl['https://github.com/evendis/earl']
|
30
29
|
earl.title
|
@@ -34,31 +33,39 @@ Pass any url-like string to Earl:
|
|
34
33
|
earl.image
|
35
34
|
=> "https://a248.e.akamai.net/assets.github.com/images/modules/header/logov7@4x.png?1340935010"
|
36
35
|
|
37
|
-
|
36
|
+
### How do I get oembed details for a link?
|
38
37
|
|
39
38
|
Earl will get oembed details for a link if they are available.
|
40
39
|
|
41
|
-
earl = Earl['
|
40
|
+
earl = Earl['https://www.youtube.com/watch?v=hNSkCqMUMQA']
|
42
41
|
earl.oembed
|
43
|
-
=> {
|
42
|
+
=> {:title=>"[JA][Keynote] Ruby Taught Me About Encoding Under the Hood / Mari Imaizumi @ima1zumi",
|
43
|
+
:author_name=>"RubyKaigi",
|
44
|
+
:author_url=>"https://www.youtube.com/@rubykaigi4884",
|
45
|
+
:type=>"video",
|
46
|
+
:height=>113,
|
47
|
+
:width=>200,
|
48
|
+
:version=>"1.0",
|
49
|
+
:provider_name=>"YouTube",
|
50
|
+
:provider_url=>"https://www.youtube.com/",
|
51
|
+
:thumbnail_height=>360,
|
52
|
+
:thumbnail_width=>480,
|
53
|
+
:thumbnail_url=>"https://i.ytimg.com/vi/hNSkCqMUMQA/hqdefault.jpg",
|
54
|
+
:html=>
|
55
|
+
"<iframe width=\"200\" height=\"113\" src=\"https://www.youtube.com/embed/hNSkCqMUMQA?feature=oembed\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen title=\"[JA][Keynote] Ruby Taught Me About Encoding Under the Hood / Mari Imaizumi @ima1zumi\"></iframe>"}
|
44
56
|
# to get the embed code:
|
45
57
|
earl.oembed_html
|
46
|
-
=> "<iframe width=\"
|
58
|
+
=> "<iframe width=\"200\" height=\"113\" src=\"https://www.youtube.com/embed/hNSkCqMUMQA?feature=oembed\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen title=\"[JA][Keynote] Ruby Taught Me About Encoding Under the Hood / Mari Imaizumi @ima1zumi\"></iframe>"
|
47
59
|
|
48
|
-
|
60
|
+
### How do I customise the oembed link?
|
49
61
|
|
50
62
|
Supported oembed parameters may be provided with to `Earl.new` or to the `oembed` call:
|
51
63
|
|
52
|
-
earl = Earl.new('
|
53
|
-
earl.oembed_html
|
54
|
-
=> <iframe width="200" height="150" src="http://www.youtube.com/embed/g3DCEcSlfhw?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>
|
55
|
-
earl = Earl.new('http://www.youtube.com/watch?v=g3DCEcSlfhw')
|
56
|
-
earl.oembed({:maxwidth => "100", :maxheight => "120"})
|
64
|
+
earl = Earl.new('https://www.youtube.com/watch?v=hNSkCqMUMQA', { oembed: { maxwidth: '200', maxheight: '320' }})
|
57
65
|
earl.oembed_html
|
58
|
-
=> <iframe width
|
59
|
-
|
66
|
+
=> "<iframe width=\"200\" height=\"113\" src=\"https://www.youtube.com/embed/hNSkCqMUMQA?feature=oembed\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen title=\"[JA][Keynote] Ruby Taught Me About Encoding Under the Hood / Mari Imaizumi @ima1zumi\"></iframe>"
|
60
67
|
|
61
|
-
|
68
|
+
### How do I inspect what attributes are available for a page?
|
62
69
|
|
63
70
|
To see all of the attributes a URL provides, simply ask:
|
64
71
|
|
@@ -66,8 +73,7 @@ To see all of the attributes a URL provides, simply ask:
|
|
66
73
|
earl.attributes
|
67
74
|
=> [:title, :image, :description, :rss_feed, :atom_feed, :content_type, :base_url, :charset, :content_encoding, :headers, :feed]
|
68
75
|
|
69
|
-
|
70
|
-
=== How can I extend Earl to scrape additional page details?
|
76
|
+
### How can I extend Earl to scrape additional page details?
|
71
77
|
|
72
78
|
Need to scrape additional page details currently supported by Earl? Implement your own scraper:
|
73
79
|
|
@@ -87,24 +93,23 @@ Your new attribute is now available for use:
|
|
87
93
|
Earl['http://www.quotationspage.com/qotd.html'].qotd
|
88
94
|
=> "Love is a snowmobile racing across the tundra and then suddenly it flips over, pinning you underneath. At night, the ice weasels come."
|
89
95
|
|
90
|
-
|
91
|
-
=== How do I install it for normal use?
|
96
|
+
### How do I install it for normal use?
|
92
97
|
|
93
98
|
If using bundler, add gem 'earl' your application's Gemfile and run `bundle`.
|
94
99
|
|
95
100
|
Or install it from the command-line:
|
96
101
|
|
97
|
-
|
102
|
+
gem install earl
|
98
103
|
|
99
|
-
|
104
|
+
### How do I install it for gem development?
|
100
105
|
|
101
106
|
To work on enhancements of fix bugs in Earl, fork and clone the github repository.
|
102
|
-
If you are using bundler (recommended), run
|
107
|
+
If you are using bundler (recommended), run `bundle` to install development dependencies:
|
103
108
|
|
104
|
-
|
105
|
-
|
109
|
+
gem install bundler
|
110
|
+
bundle
|
106
111
|
|
107
|
-
|
112
|
+
### How do I run the tests?
|
108
113
|
|
109
114
|
Once development dependencies are installed, all unit tests are run with just:
|
110
115
|
|
@@ -112,25 +117,18 @@ Once development dependencies are installed, all unit tests are run with just:
|
|
112
117
|
# or..
|
113
118
|
$ rake spec
|
114
119
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
$ rake spec:integration
|
119
|
-
|
120
|
-
To run all tests (unit and integration):
|
121
|
-
|
122
|
-
$ rake spec:all
|
123
|
-
|
120
|
+
VCR is used to record integration tests. To re-record sessions, delete the corresponding cassette in
|
121
|
+
[spec/fixtures/cassettes](./spec/fixtures/cassettes/).
|
124
122
|
|
125
|
-
|
123
|
+
### How do I automatically run tests when I modify files?
|
126
124
|
|
127
125
|
Guard is installed as part of the development dependencies. Start a guard process in a terminal window:
|
128
126
|
|
129
|
-
|
127
|
+
bundle exec guard
|
130
128
|
|
131
129
|
It will run all the tests to start with by default. Then whenever you change a file, the associated tests will execute in this terminal window.
|
132
130
|
|
133
|
-
|
131
|
+
## Contributing to Earl
|
134
132
|
|
135
133
|
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
136
134
|
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
@@ -140,6 +138,6 @@ It will run all the tests to start with by default. Then whenever you change a f
|
|
140
138
|
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
141
139
|
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
142
140
|
|
143
|
-
|
141
|
+
## Copyright
|
144
142
|
|
145
|
-
See LICENSE for details.
|
143
|
+
See [LICENSE](./LICENSE) for details.
|
data/Rakefile
CHANGED
@@ -1,38 +1,13 @@
|
|
1
|
-
|
2
|
-
require "bundler/gem_tasks"
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
require '
|
3
|
+
require 'bundler/gem_tasks'
|
5
4
|
require 'rspec/core/rake_task'
|
6
5
|
|
7
|
-
|
8
|
-
RSpec::Core::RakeTask.new do |t|
|
9
|
-
t.rspec_opts = ["-c", "-f progress"]
|
10
|
-
t.pattern = 'spec/unit/**/*_spec.rb'
|
11
|
-
end
|
12
|
-
|
13
|
-
desc "Run only integration test examples"
|
14
|
-
RSpec::Core::RakeTask.new(:'spec:integration') do |t|
|
15
|
-
t.rspec_opts = ["-c", "-f progress"]
|
16
|
-
t.pattern = 'spec/integration/**/*_spec.rb'
|
17
|
-
end
|
18
|
-
|
19
|
-
desc "Run all test examples including integration tests"
|
20
|
-
RSpec::Core::RakeTask.new(:'spec:all') do |t|
|
21
|
-
t.rspec_opts = ["-c", "-f progress"]
|
22
|
-
t.pattern = 'spec/**/*_spec.rb'
|
23
|
-
end
|
24
|
-
|
25
|
-
task :default => :spec
|
6
|
+
RSpec::Core::RakeTask.new(:spec)
|
26
7
|
|
27
|
-
|
28
|
-
RDoc::Task.new do |rdoc|
|
29
|
-
rdoc.main = "README.rdoc"
|
30
|
-
rdoc.rdoc_dir = 'rdoc'
|
31
|
-
rdoc.title = "earl"
|
32
|
-
rdoc.rdoc_files.include('README*', 'lib/**/*.rb')
|
33
|
-
end
|
8
|
+
task default: :spec
|
34
9
|
|
35
|
-
desc
|
10
|
+
desc 'Open an irb session preloaded with this library'
|
36
11
|
task :console do
|
37
|
-
sh
|
38
|
-
end
|
12
|
+
sh 'irb -I lib -r earl.rb'
|
13
|
+
end
|
data/earl.gemspec
CHANGED
@@ -1,27 +1,26 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'English'
|
4
|
+
lib = File.expand_path('lib', __dir__)
|
5
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
6
|
+
require 'earl/version'
|
3
7
|
|
4
8
|
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = [
|
6
|
-
gem.email = [
|
7
|
-
gem.description =
|
8
|
-
gem.summary =
|
9
|
-
gem.homepage =
|
9
|
+
gem.authors = ['teejayvanslyke', 'Paul Gallagher']
|
10
|
+
gem.email = ['tj@elctech.com', 'gallagher.paul@gmail.com']
|
11
|
+
gem.description = 'URL metadata API'
|
12
|
+
gem.summary = 'URL metadata API for scraping titles, descriptions, images, and videos from URLs'
|
13
|
+
gem.homepage = 'https://github.com/evendis/earl'
|
10
14
|
|
11
|
-
gem.files = `git ls-files`.split(
|
12
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
15
|
+
gem.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
13
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
-
gem.name =
|
15
|
-
gem.require_paths = [
|
18
|
+
gem.name = 'earl'
|
19
|
+
gem.require_paths = ['lib']
|
16
20
|
gem.version = Earl::VERSION
|
21
|
+
gem.license = 'MIT'
|
17
22
|
|
18
|
-
gem.
|
19
|
-
gem.add_runtime_dependency
|
20
|
-
gem.
|
21
|
-
gem.add_development_dependency(%q<rake>, ["~> 0.9.2.2"])
|
22
|
-
gem.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
|
23
|
-
gem.add_development_dependency(%q<rdoc>, ["~> 3.11"])
|
24
|
-
gem.add_development_dependency(%q<guard-rspec>, ["~> 1.2.0"])
|
25
|
-
gem.add_development_dependency(%q<rb-fsevent>, ["~> 0.9.1"])
|
26
|
-
|
23
|
+
gem.required_ruby_version = '>= 3.0'
|
24
|
+
gem.add_runtime_dependency 'nokogiri', '~> 1.18'
|
25
|
+
gem.add_runtime_dependency 'ruby-oembed', '~> 0.18.1'
|
27
26
|
end
|
data/lib/earl/earl.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
|
2
|
-
require 'oembedr'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
3
|
+
# Earl is a class that represents a URL and provides methods to fetch metadata about the page
|
4
4
|
class Earl
|
5
|
+
attr_accessor :url, :options
|
6
|
+
attr_writer :oembed
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
-
def initialize(url, options={})
|
8
|
+
def initialize(url, options = {})
|
9
9
|
@url = url
|
10
10
|
@options = options
|
11
11
|
end
|
@@ -19,7 +19,7 @@ class Earl
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def uri_response
|
22
|
-
@uri_response ||= open
|
22
|
+
@uri_response ||= uri.open
|
23
23
|
end
|
24
24
|
|
25
25
|
# Returns
|
@@ -36,22 +36,22 @@ class Earl
|
|
36
36
|
when :headers
|
37
37
|
uri_response_attribute(:meta)
|
38
38
|
else
|
39
|
-
uri_response
|
39
|
+
uri_response.respond_to?(name) && uri_response.send(name)
|
40
40
|
end
|
41
41
|
end
|
42
42
|
protected :uri_response_attribute
|
43
43
|
|
44
44
|
def uri_response_attributes
|
45
|
-
[
|
45
|
+
%i[content_type base_url charset content_encoding headers]
|
46
46
|
end
|
47
47
|
protected :uri_response_attributes
|
48
48
|
|
49
49
|
def scraper
|
50
|
-
@scraper ||= Scraper.for(url,self)
|
50
|
+
@scraper ||= Scraper.for(url, self)
|
51
51
|
end
|
52
52
|
|
53
53
|
def response
|
54
|
-
scraper
|
54
|
+
scraper&.response
|
55
55
|
end
|
56
56
|
|
57
57
|
# Returns a hash of link meta data, including:
|
@@ -60,23 +60,28 @@ class Earl
|
|
60
60
|
def metadata
|
61
61
|
data = oembed || {}
|
62
62
|
attributes.each do |attribute|
|
63
|
-
if attribute_value =
|
63
|
+
if attribute_value = send(attribute)
|
64
64
|
data[attribute] ||= attribute_value
|
65
65
|
end
|
66
66
|
end
|
67
67
|
data
|
68
68
|
end
|
69
69
|
|
70
|
+
def respond_to_missing?(name, include_private)
|
71
|
+
uri_response_attributes.include?(name) || scraper&.attribute?(name) || super
|
72
|
+
end
|
73
|
+
|
70
74
|
# Dispatch missing methods if a match for:
|
71
75
|
# - uri_response_attributes
|
72
76
|
# - scraper attributes
|
73
77
|
def method_missing(method, *args)
|
74
78
|
if uri_response_attributes.include?(method)
|
75
|
-
|
76
|
-
elsif scraper
|
77
|
-
|
79
|
+
uri_response_attribute(method)
|
80
|
+
elsif scraper&.attribute?(method)
|
81
|
+
scraper.attribute(method)
|
82
|
+
else
|
83
|
+
super
|
78
84
|
end
|
79
|
-
super
|
80
85
|
end
|
81
86
|
|
82
87
|
# Returns a full array of attributes available for the link
|
@@ -86,52 +91,66 @@ class Earl
|
|
86
91
|
|
87
92
|
# Returns the options to be used for oembed
|
88
93
|
def oembed_options
|
89
|
-
{ :
|
90
|
-
end
|
91
|
-
|
92
|
-
# Returns the oembed meta data hash for the URL (or nil if not defined/available)
|
93
|
-
#
|
94
|
-
#
|
95
|
-
#
|
96
|
-
#
|
97
|
-
#
|
98
|
-
#
|
99
|
-
#
|
100
|
-
#
|
101
|
-
#
|
102
|
-
#
|
103
|
-
#
|
104
|
-
#
|
105
|
-
#
|
106
|
-
#
|
107
|
-
#
|
108
|
-
#
|
94
|
+
{ maxwidth: '560', maxheight: '315' }.merge(options[:oembed] || {})
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns the oembed meta data hash for the URL (or nil if not defined/available)
|
98
|
+
# e.g. for https://www.youtube.com/watch?v=hNSkCqMUMQA:
|
99
|
+
# {
|
100
|
+
# :title=>"[JA][Keynote] Ruby Taught Me About Encoding Under the Hood / Mari Imaizumi @ima1zumi",
|
101
|
+
# :author_name=>"RubyKaigi",
|
102
|
+
# :author_url=>"https://www.youtube.com/@rubykaigi4884",
|
103
|
+
# :type=>"video",
|
104
|
+
# :height=>113,
|
105
|
+
# :width=>200,
|
106
|
+
# :version=>"1.0",
|
107
|
+
# :provider_name=>"YouTube",
|
108
|
+
# :provider_url=>"https://www.youtube.com/",
|
109
|
+
# :thumbnail_height=>360,
|
110
|
+
# :thumbnail_width=>480,
|
111
|
+
# :thumbnail_url=>"https://i.ytimg.com/vi/hNSkCqMUMQA/hqdefault.jpg",
|
112
|
+
# :html=> "<iframe width=\"200\" height=\"113\" src=\"https://www.youtube.com/embed/hNSkCqMUMQA?feature=oembed\" \
|
113
|
+
# frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" \
|
114
|
+
# referrerpolicy=\"strict-origin-when-cross-origin\" allowfullscreen \
|
115
|
+
# title=\"[JA][Keynote] Ruby Taught Me About Encoding Under the Hood / Mari Imaizumi @ima1zumi\"></iframe>"
|
116
|
+
# }
|
109
117
|
#
|
110
118
|
# +options+ defines a custom oembed options hash and will cause a re-fetch of the oembed metadata
|
111
|
-
def oembed(options=nil)
|
119
|
+
def oembed(options = nil)
|
112
120
|
if options # use custom options, refetch oembed metadata
|
113
121
|
@options[:oembed] = options
|
114
122
|
@oembed = nil
|
115
123
|
end
|
116
|
-
begin
|
117
|
-
|
124
|
+
@oembed ||= begin
|
125
|
+
h = fetch_oembed(base_url).fields
|
126
|
+
if h
|
118
127
|
h.keys.each do |key| # symbolize_keys!
|
119
|
-
|
128
|
+
new_key = begin
|
129
|
+
key.to_sym
|
130
|
+
rescue StandardError
|
131
|
+
key
|
132
|
+
end
|
133
|
+
h[new_key] = h.delete(key)
|
120
134
|
end
|
121
135
|
h
|
122
136
|
end
|
123
|
-
rescue
|
137
|
+
rescue StandardError
|
138
|
+
nil
|
124
139
|
end
|
125
|
-
@oembed
|
126
140
|
end
|
127
141
|
|
142
|
+
def fetch_oembed(base_url)
|
143
|
+
OEmbed::Providers.get(base_url)
|
144
|
+
end
|
145
|
+
protected :fetch_oembed
|
146
|
+
|
128
147
|
# Returns the oembed code for the url (or nil if not defined/available)
|
129
148
|
def oembed_html
|
130
149
|
oembed && oembed[:html]
|
131
150
|
end
|
132
151
|
|
133
152
|
# Returns true if there is an ATOM or RSS feed associated with this URL.
|
134
|
-
def
|
153
|
+
def feed?
|
135
154
|
!feed.nil?
|
136
155
|
end
|
137
156
|
|
@@ -147,12 +166,7 @@ class Earl
|
|
147
166
|
end
|
148
167
|
end
|
149
168
|
|
150
|
-
|
151
|
-
|
152
|
-
def [](url)
|
153
|
-
new(url)
|
154
|
-
end
|
155
|
-
|
169
|
+
def self.[](url)
|
170
|
+
new(url)
|
156
171
|
end
|
157
|
-
|
158
172
|
end
|