coelacanth 0.2.3 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d92c04c82f120570d29b58787a36897bb769d789dfd5ecbf5f7da725c08710a
4
- data.tar.gz: d7e25b8f1eb0e269dec2d6cf8c0c9209693b2f068ebf389ee61f3450fe7db3b2
3
+ metadata.gz: dc0f98072fb81b09973a8acbee60d8f9b34c31cdb75e161f7d2f1e8f2bd9ee3c
4
+ data.tar.gz: fba2700643da179439dc2588717bb78b06fb6d9ef056c262ae792084e81ab4eb
5
5
  SHA512:
6
- metadata.gz: 432f908bf812f69f6398e0346d6f7a104391f56c1057adaa14de06457f2512ddd695b58f2b96ad90ca9063ea533bc54429f4b199c5dea541901dc2839c645143
7
- data.tar.gz: 1fdbe4cbe2d354d15d04fb2ba4fd3fd04f2f6c966d91123f443b75831d28aa977c691864bb3d84e1a6922878dbc0120d88d1cdc54ae0390f3052e6331f7dd12f
6
+ metadata.gz: 0a7c6bd212ba78c390798d9b522f5f3f5927941a95eec56ec20e78c0d4744756cd8b3a5cc8eada99f8e5f4539309785d7b8dec1c6ab0454201450e700063016f
7
+ data.tar.gz: 7144aaf89b5ad33ebb8858e866c639c70da8aaffa9d028b7fe898d4fdf0c4d2cb65c4793d96af8bfa4cc2c595b1cb4559927dceda1368323b8d98cfb215363fb
data/Gemfile CHANGED
@@ -5,8 +5,14 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in coelacanth.gemspec
6
6
  gemspec
7
7
 
8
- gem "ferrum", "~> 0.15"
8
+ gem "ferrum", "~> 0.16"
9
9
  gem "rake", "~> 13.2"
10
10
  gem "rspec", "~> 3.0"
11
11
  gem "rubocop", "~> 1.21"
12
12
  gem "oga", "~> 3.4"
13
+ gem "base64", "~> 0.2.0"
14
+
15
+ group :development, :test do
16
+ gem "webmock", "~> 3.25"
17
+ end
18
+
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- coelacanth (0.2.3)
4
+ coelacanth (0.3.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -9,33 +9,43 @@ GEM
9
9
  addressable (2.8.7)
10
10
  public_suffix (>= 2.0.2, < 7.0)
11
11
  ansi (1.5.0)
12
- ast (2.4.2)
13
- concurrent-ruby (1.3.4)
14
- diff-lcs (1.5.1)
15
- ferrum (0.15)
12
+ ast (2.4.3)
13
+ base64 (0.2.0)
14
+ bigdecimal (3.1.9)
15
+ concurrent-ruby (1.3.5)
16
+ crack (1.0.0)
17
+ bigdecimal
18
+ rexml
19
+ diff-lcs (1.6.1)
20
+ ferrum (0.16)
16
21
  addressable (~> 2.5)
22
+ base64 (~> 0.2)
17
23
  concurrent-ruby (~> 1.1)
18
24
  webrick (~> 1.7)
19
25
  websocket-driver (~> 0.7)
20
- json (2.7.2)
21
- language_server-protocol (3.17.0.3)
26
+ hashdiff (1.1.2)
27
+ json (2.10.2)
28
+ language_server-protocol (3.17.0.4)
29
+ lint_roller (1.1.0)
22
30
  oga (3.4)
23
31
  ast
24
32
  ruby-ll (~> 2.1)
25
- parallel (1.26.3)
26
- parser (3.3.5.0)
33
+ parallel (1.27.0)
34
+ parser (3.3.8.0)
27
35
  ast (~> 2.4.1)
28
36
  racc
37
+ prism (1.4.0)
29
38
  public_suffix (6.0.1)
30
39
  racc (1.8.1)
31
40
  rainbow (3.1.1)
32
41
  rake (13.2.1)
33
- regexp_parser (2.9.2)
42
+ regexp_parser (2.10.0)
43
+ rexml (3.4.1)
34
44
  rspec (3.13.0)
35
45
  rspec-core (~> 3.13.0)
36
46
  rspec-expectations (~> 3.13.0)
37
47
  rspec-mocks (~> 3.13.0)
38
- rspec-core (3.13.2)
48
+ rspec-core (3.13.3)
39
49
  rspec-support (~> 3.13.0)
40
50
  rspec-expectations (3.13.3)
41
51
  diff-lcs (>= 1.2.0, < 2.0)
@@ -43,26 +53,35 @@ GEM
43
53
  rspec-mocks (3.13.2)
44
54
  diff-lcs (>= 1.2.0, < 2.0)
45
55
  rspec-support (~> 3.13.0)
46
- rspec-support (3.13.1)
47
- rubocop (1.67.0)
56
+ rspec-support (3.13.2)
57
+ rubocop (1.75.3)
48
58
  json (~> 2.3)
49
- language_server-protocol (>= 3.17.0)
59
+ language_server-protocol (~> 3.17.0.2)
60
+ lint_roller (~> 1.1.0)
50
61
  parallel (~> 1.10)
51
62
  parser (>= 3.3.0.2)
52
63
  rainbow (>= 2.2.2, < 4.0)
53
- regexp_parser (>= 2.4, < 3.0)
54
- rubocop-ast (>= 1.32.2, < 2.0)
64
+ regexp_parser (>= 2.9.3, < 3.0)
65
+ rubocop-ast (>= 1.44.0, < 2.0)
55
66
  ruby-progressbar (~> 1.7)
56
- unicode-display_width (>= 2.4.0, < 3.0)
57
- rubocop-ast (1.32.3)
58
- parser (>= 3.3.1.0)
67
+ unicode-display_width (>= 2.4.0, < 4.0)
68
+ rubocop-ast (1.44.1)
69
+ parser (>= 3.3.7.2)
70
+ prism (~> 1.4)
59
71
  ruby-ll (2.1.3)
60
72
  ansi
61
73
  ast
62
74
  ruby-progressbar (1.13.0)
63
- unicode-display_width (2.6.0)
64
- webrick (1.8.2)
65
- websocket-driver (0.7.6)
75
+ unicode-display_width (3.1.4)
76
+ unicode-emoji (~> 4.0, >= 4.0.4)
77
+ unicode-emoji (4.0.4)
78
+ webmock (3.25.1)
79
+ addressable (>= 2.8.0)
80
+ crack (>= 0.3.2)
81
+ hashdiff (>= 0.4.0, < 2.0.0)
82
+ webrick (1.9.1)
83
+ websocket-driver (0.7.7)
84
+ base64
66
85
  websocket-extensions (>= 0.1.0)
67
86
  websocket-extensions (0.1.5)
68
87
 
@@ -71,12 +90,14 @@ PLATFORMS
71
90
  x86_64-linux
72
91
 
73
92
  DEPENDENCIES
93
+ base64 (~> 0.2.0)
74
94
  coelacanth!
75
- ferrum (~> 0.15)
95
+ ferrum (~> 0.16)
76
96
  oga (~> 3.4)
77
97
  rake (~> 13.2)
78
98
  rspec (~> 3.0)
79
99
  rubocop (~> 1.21)
100
+ webmock (~> 3.25)
80
101
 
81
102
  BUNDLED WITH
82
- 2.5.17
103
+ 2.6.7
data/README.md CHANGED
@@ -53,6 +53,12 @@ url = "https://example.com"
53
53
  stats = Coelacanth.analyze(url)
54
54
  ```
55
55
 
56
+ - rspec
57
+
58
+ ```
59
+ $ bundle exec rspec
60
+ ```
61
+
56
62
  ## Features
57
63
  - Get dom by oga
58
64
  - Get screenshot
@@ -1,10 +1,13 @@
1
1
  development: &development
2
+ client: "ferrum" # Options: "ferrum", "screenshot_one"
2
3
  remote_client:
3
4
  ws_url: "ws://chrome:3000/chrome"
4
5
  timeout: 10 # seconds
5
6
  headers:
6
7
  Authorization: "Bearer 1234567890"
7
8
  User-Agent: "Coelacanth Chrome Extension"
9
+ screenshot_one:
10
+ key: "your_screenshot_one_api_key_here"
8
11
  test:
9
12
  <<: *development
10
13
  production:
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+
5
+ module Coelacanth::Client
6
+ # Coelacanth::Client
7
+ class Base
8
+ def initialize(url, config = Coelacanth.config)
9
+ @validator = Coelacanth::Validator.new
10
+ raise URI::InvalidURIError unless @validator.valid_url?(url)
11
+ @config = config
12
+ @url = url
13
+ end
14
+
15
+ def client
16
+ @config.read("client")
17
+ end
18
+
19
+ def get_response(url = nil)
20
+ raise "Must be implemented in subclass"
21
+ end
22
+
23
+ def get_screenshot
24
+ raise "Must be implemented in subclass"
25
+ end
26
+ end
27
+ end
@@ -2,13 +2,11 @@
2
2
 
3
3
  require "ferrum"
4
4
 
5
- module Coelacanth
5
+ module Coelacanth::Client
6
6
  # Coelacanth::Client
7
- class Client
7
+ class Ferrum < Coelacanth::Client::Base
8
8
  def initialize(url)
9
- @validator = Validator.new
10
- raise URI::InvalidURIError unless @validator.valid_url?(url)
11
- @config = Coelacanth.config
9
+ super(url)
12
10
  remote_client.goto(url)
13
11
  end
14
12
 
@@ -32,7 +30,7 @@ module Coelacanth
32
30
  def remote_client
33
31
  if @remote_client.nil?
34
32
  headers = @config.read("remote_client.headers")
35
- @remote_client = Ferrum::Browser.new(
33
+ @remote_client = ::Ferrum::Browser.new(
36
34
  ws_url: @config.read("remote_client.ws_url"),
37
35
  timeout: @config.read("remote_client.timeout")
38
36
  ).create_page
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open-uri"
4
+ require "ferrum"
5
+
6
+ module Coelacanth::Client
7
+ # Coelacanth::Client
8
+ class ScreenshotOne < Coelacanth::Client::Base
9
+ def get_response
10
+ @origin_response = URI(@url).open
11
+ @status_code = @origin_response.status[0].to_i
12
+ body = @origin_response.read
13
+ body
14
+ rescue OpenURI::HTTPError => e
15
+ @status_code = e.io.status[0].to_i
16
+ raise e
17
+ end
18
+
19
+ def get_screenshot
20
+ api_key = @config.read("screenshot_one.key")
21
+ uri = URI("https://api.screenshotone.com/take")
22
+ params = {
23
+ access_key: api_key,
24
+ url: @url,
25
+ format: "jpg",
26
+ block_ads: true,
27
+ block_cookie_banners: true,
28
+ block_banners_by_heuristics: false,
29
+ block_trackers: true,
30
+ delay: 0,
31
+ timeout: 60,
32
+ response_type: "by_format",
33
+ image_quality: 80
34
+ }
35
+ uri.query = URI.encode_www_form(params)
36
+
37
+ response = Net::HTTP.get_response(uri)
38
+ raise "Failed to fetch screenshot: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
39
+
40
+ response.body
41
+ end
42
+ end
43
+ end
@@ -6,7 +6,7 @@ module Coelacanth
6
6
  # Coelacanth::Dom
7
7
  class Dom
8
8
  def oga(url)
9
- Oga.parse_xml(Client.new(url).get_response)
9
+ Oga.parse_xml(Client::Ferrum.new(url).get_response)
10
10
  end
11
11
  end
12
12
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coelacanth
4
- VERSION = "0.2.3"
4
+ VERSION = "0.3.2"
5
5
  end
data/lib/coelacanth.rb CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  require "net/http"
4
4
  require_relative "coelacanth/configure"
5
- require_relative "coelacanth/client"
5
+ require_relative "coelacanth/client/base"
6
+ require_relative "coelacanth/client/ferrum"
7
+ require_relative "coelacanth/client/screenshot_one"
6
8
  require_relative "coelacanth/dom"
7
9
  require_relative "coelacanth/redirect"
8
10
  require_relative "coelacanth/validator"
@@ -15,7 +17,8 @@ module Coelacanth
15
17
  class DeepRedirectError < StandardError; end
16
18
 
17
19
  def self.analyze(url)
18
- @client = Client.new(url)
20
+ client_class = config.read("client") == "screenshot_one" ? Client::ScreenshotOne : Client::Ferrum
21
+ @client = client_class.new(url)
19
22
  regular_url = Redirect.new.resolve_redirect(url)
20
23
  {
21
24
  dom: Dom.new.oga(regular_url),
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coelacanth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yusuke
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2024-10-23 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies: []
13
12
  description: |
14
13
  coelacanth is a gem that allows you to easily parse and analyze web pages,
@@ -31,7 +30,9 @@ files:
31
30
  - compose.yml
32
31
  - config/coelacanth.yml
33
32
  - lib/coelacanth.rb
34
- - lib/coelacanth/client.rb
33
+ - lib/coelacanth/client/base.rb
34
+ - lib/coelacanth/client/ferrum.rb
35
+ - lib/coelacanth/client/screenshot_one.rb
35
36
  - lib/coelacanth/configure.rb
36
37
  - lib/coelacanth/dom.rb
37
38
  - lib/coelacanth/redirect.rb
@@ -44,7 +45,6 @@ metadata:
44
45
  homepage_uri: https://github.com/slidict/coelacanth
45
46
  source_code_uri: https://github.com/slidict/coelacanth
46
47
  changelog_uri: https://github.com/slidict/coelacanth/releases
47
- post_install_message:
48
48
  rdoc_options: []
49
49
  require_paths:
50
50
  - lib
@@ -59,8 +59,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
59
59
  - !ruby/object:Gem::Version
60
60
  version: '0'
61
61
  requirements: []
62
- rubygems_version: 3.4.1
63
- signing_key:
62
+ rubygems_version: 3.6.7
64
63
  specification_version: 4
65
64
  summary: A gem for analyzing and extracting statistics from web pages.
66
65
  test_files: []