coelacanth 0.1.6 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0172679388ce0d8718122988de86589b623e0ab432f9e08b589dd22e2009f74a'
4
- data.tar.gz: baa0edff8a94564e33cc8596e44cfe49a040dd920798a10bbca824c2ce4d1f89
3
+ metadata.gz: 506450022b367ef2795c9d67d66548b1893757dd740512286fb90f9b952f21d1
4
+ data.tar.gz: df860c4216846549ac335b15dbef4cd9f99aa068a464a47b58ba973ba9d3b771
5
5
  SHA512:
6
- metadata.gz: 16564c8664acf81dddb9188f828ab7e59da5969c610302e64325e6233f928a908e404332b244c48f3b9e0a7e984d88d3713ca885600cd1cd9a6e3057e273630a
7
- data.tar.gz: 543fff73d944bf4b328e0bd3cebf975b9aaa093e2553d36c61ef09cefd1cc76ac997387cebba43580f059d497899006107a6bb0473afc2524f10423fa3cfad54
6
+ metadata.gz: f4f483791381bf5672c74c382c73129703983c936720184cdd870db211cb740bb5ff6685460c6152213eea8dfcce85c6666ffa189d23e3b58e92c183658b486e
7
+ data.tar.gz: 577737cc29a81134a288bb3bee66a220d46f80749a9824ff5b487b62dfe97a8f1209124266715c58d9021e7e88d47e634f675462e9d27e62bd8e3e93c532e31d
data/Gemfile CHANGED
@@ -6,7 +6,7 @@ source "https://rubygems.org"
6
6
  gemspec
7
7
 
8
8
  gem "ferrum", "~> 0.15"
9
- gem "rake", "~> 13.0"
9
+ gem "rake", "~> 13.2"
10
10
  gem "rspec", "~> 3.0"
11
11
  gem "rubocop", "~> 1.21"
12
12
  gem "oga", "~> 3.4"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- coelacanth (0.1.6)
4
+ coelacanth (0.2.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -11,59 +11,57 @@ GEM
11
11
  ansi (1.5.0)
12
12
  ast (2.4.2)
13
13
  concurrent-ruby (1.3.4)
14
- diff-lcs (1.4.4)
14
+ diff-lcs (1.5.1)
15
15
  ferrum (0.15)
16
16
  addressable (~> 2.5)
17
17
  concurrent-ruby (~> 1.1)
18
18
  webrick (~> 1.7)
19
19
  websocket-driver (~> 0.7)
20
- json (2.7.1)
20
+ json (2.7.2)
21
21
  language_server-protocol (3.17.0.3)
22
22
  oga (3.4)
23
23
  ast
24
24
  ruby-ll (~> 2.1)
25
- parallel (1.24.0)
26
- parser (3.3.0.5)
25
+ parallel (1.26.3)
26
+ parser (3.3.5.0)
27
27
  ast (~> 2.4.1)
28
28
  racc
29
29
  public_suffix (6.0.1)
30
- racc (1.7.3)
30
+ racc (1.8.1)
31
31
  rainbow (3.1.1)
32
- rake (13.1.0)
33
- regexp_parser (2.9.0)
34
- rexml (3.2.6)
35
- rspec (3.10.0)
36
- rspec-core (~> 3.10.0)
37
- rspec-expectations (~> 3.10.0)
38
- rspec-mocks (~> 3.10.0)
39
- rspec-core (3.10.1)
40
- rspec-support (~> 3.10.0)
41
- rspec-expectations (3.10.1)
32
+ rake (13.2.1)
33
+ regexp_parser (2.9.2)
34
+ rspec (3.13.0)
35
+ rspec-core (~> 3.13.0)
36
+ rspec-expectations (~> 3.13.0)
37
+ rspec-mocks (~> 3.13.0)
38
+ rspec-core (3.13.2)
39
+ rspec-support (~> 3.13.0)
40
+ rspec-expectations (3.13.3)
42
41
  diff-lcs (>= 1.2.0, < 2.0)
43
- rspec-support (~> 3.10.0)
44
- rspec-mocks (3.10.2)
42
+ rspec-support (~> 3.13.0)
43
+ rspec-mocks (3.13.2)
45
44
  diff-lcs (>= 1.2.0, < 2.0)
46
- rspec-support (~> 3.10.0)
47
- rspec-support (3.10.2)
48
- rubocop (1.62.1)
45
+ rspec-support (~> 3.13.0)
46
+ rspec-support (3.13.1)
47
+ rubocop (1.67.0)
49
48
  json (~> 2.3)
50
49
  language_server-protocol (>= 3.17.0)
51
50
  parallel (~> 1.10)
52
51
  parser (>= 3.3.0.2)
53
52
  rainbow (>= 2.2.2, < 4.0)
54
- regexp_parser (>= 1.8, < 3.0)
55
- rexml (>= 3.2.5, < 4.0)
56
- rubocop-ast (>= 1.31.1, < 2.0)
53
+ regexp_parser (>= 2.4, < 3.0)
54
+ rubocop-ast (>= 1.32.2, < 2.0)
57
55
  ruby-progressbar (~> 1.7)
58
56
  unicode-display_width (>= 2.4.0, < 3.0)
59
- rubocop-ast (1.31.2)
60
- parser (>= 3.3.0.4)
57
+ rubocop-ast (1.32.3)
58
+ parser (>= 3.3.1.0)
61
59
  ruby-ll (2.1.3)
62
60
  ansi
63
61
  ast
64
62
  ruby-progressbar (1.13.0)
65
- unicode-display_width (2.5.0)
66
- webrick (1.8.1)
63
+ unicode-display_width (2.6.0)
64
+ webrick (1.8.2)
67
65
  websocket-driver (0.7.6)
68
66
  websocket-extensions (>= 0.1.0)
69
67
  websocket-extensions (0.1.5)
@@ -76,9 +74,9 @@ DEPENDENCIES
76
74
  coelacanth!
77
75
  ferrum (~> 0.15)
78
76
  oga (~> 3.4)
79
- rake (~> 13.0)
77
+ rake (~> 13.2)
80
78
  rspec (~> 3.0)
81
79
  rubocop (~> 1.21)
82
80
 
83
81
  BUNDLED WITH
84
- 2.5.7
82
+ 2.5.17
data/README.md CHANGED
@@ -50,11 +50,12 @@ Then, you can easily parse and extract information from a web page like this:
50
50
 
51
51
  ```ruby
52
52
  url = "https://example.com"
53
- stats = Coelacanth.analyze(url) # Not yet implement
53
+ stats = Coelacanth.analyze(url)
54
54
  ```
55
55
 
56
56
  ## Features
57
- - More features coming soon!
57
+ - Get dom by oga
58
+ - Get screenshot
58
59
 
59
60
  ## Commit Message Guidelines
60
61
 
data/compose.yml CHANGED
@@ -18,6 +18,5 @@ services:
18
18
  - app-tier
19
19
  chrome:
20
20
  image: browserless/chrome:latest
21
- container_name: chrome
22
21
  networks:
23
22
  - app-tier
@@ -1,5 +1,4 @@
1
1
  development: &development
2
- use_remote_client: false
3
2
  remote_client:
4
3
  ws_url: "ws://chrome:3000/chrome"
5
4
  timeout: 10 # seconds
@@ -1,76 +1,34 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "ferrum"
4
- require "oga"
5
4
 
6
5
  module Coelacanth
7
6
  # Coelacanth::Client
8
7
  class Client
9
- def initialize(url = nil)
8
+ def initialize(url)
9
+ @validator = Validator.new
10
+ raise URI::InvalidURIError unless @validator.valid_url?(url)
10
11
  @config = Coelacanth.config
11
- @url = url if url && valid_url?(url)
12
- end
13
-
14
- def valid_url?(url = nil)
15
- @url = url if url
16
- uri = URI.parse(@url)
17
- uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
18
- rescue URI::InvalidURIError
19
- false
20
- end
21
-
22
- def resolve_redirect(url = nil, limit = 10)
23
- @url = url if url && valid_url?(url)
24
- raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
25
- raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
26
-
27
- get_response(@url)
28
- handle_response(@origin_response, limit)
29
- end
30
-
31
- def oga(url = nil)
32
- @url = url if url && valid_url?(url)
33
- Oga.parse_xml(get_response(@url))
12
+ remote_client.goto(url)
34
13
  end
35
14
 
36
15
  def get_response(url = nil)
37
- @url = url if url && valid_url?(url)
38
- if @config.read("use_remote_client")
39
- response_by_remote_client
40
- else
41
- response_by_net_http
42
- end
43
- end
44
-
45
- private
46
-
47
- def handle_response(response, limit)
48
- codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
49
- case @status_code.to_s
50
- when /^#{codes[Net::HTTPSuccess]}\d\d$/
51
- @url
52
- when /^#{codes[Net::HTTPRedirection]}\d\d$/
53
- @url = response["location"]
54
- resolve_redirect(response["location"], limit - 1)
55
- else
56
- raise Coelacanth::RedirectError
57
- end
58
- end
59
-
60
- def response_by_remote_client
61
- remote_client.goto(@url)
62
16
  @status_code = remote_client.network.status
63
17
  @origin_response = remote_client
64
- remote_client.body
18
+ body = remote_client.body
19
+ page.network.wait_for_idle! # might raise an error
20
+ body
65
21
  end
66
22
 
67
- def response_by_net_http
68
- response = Net::HTTP.get_response(URI.parse(@url))
69
- @status_code = response.code
70
- @origin_response = response
71
- response.body
23
+ def get_screenshot
24
+ tempfile = Tempfile.new
25
+ remote_client.screenshot(path: tempfile.path, format: "png")
26
+ page.network.wait_for_idle! # might raise an error
27
+ File.read(tempfile.path)
72
28
  end
73
29
 
30
+ private
31
+
74
32
  def remote_client
75
33
  if @remote_client.nil?
76
34
  headers = @config.read("remote_client.headers")
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "oga"
4
+
5
+ module Coelacanth
6
+ # Coelacanth::Dom
7
+ class Dom
8
+ def oga(url)
9
+ Oga.parse_xml(Client.new(url).get_response)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+ require "oga"
5
+
6
+ module Coelacanth
7
+ # Coelacanth::Redirect
8
+ class Redirect
9
+ def resolve_redirect(url, limit = 10)
10
+ @url = url if url && Validator.new.valid_url?(url)
11
+ raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
12
+ raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
13
+
14
+ response = Net::HTTP.get_response(URI.parse(@url))
15
+ @status_code = response.code
16
+ @origin_response = response
17
+
18
+ handle_response(@origin_response, limit)
19
+ end
20
+
21
+ private
22
+
23
+ def handle_response(response, limit)
24
+ codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
25
+ case @status_code.to_s
26
+ when /^#{codes[Net::HTTPSuccess]}\d\d$/
27
+ @url
28
+ when /^#{codes[Net::HTTPRedirection]}\d\d$/
29
+ @url = response["location"]
30
+ resolve_redirect(response["location"], limit - 1)
31
+ else
32
+ raise Coelacanth::RedirectError
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+
5
+ module Coelacanth
6
+ # Coelacanth::Validator
7
+ class Validator
8
+ def valid_url?(url)
9
+ uri = URI.parse(url)
10
+ uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
11
+ rescue URI::InvalidURIError
12
+ false
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coelacanth
4
- VERSION = "0.1.6"
4
+ VERSION = "0.2.2"
5
5
  end
data/lib/coelacanth.rb CHANGED
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "net/http"
4
- require_relative "coelacanth/version"
5
4
  require_relative "coelacanth/configure"
6
5
  require_relative "coelacanth/client"
6
+ require_relative "coelacanth/dom"
7
+ require_relative "coelacanth/redirect"
8
+ require_relative "coelacanth/validator"
9
+ require_relative "coelacanth/version"
7
10
 
8
11
  # Coelacanth
9
12
  module Coelacanth
@@ -13,10 +16,10 @@ module Coelacanth
13
16
 
14
17
  def self.analyze(url)
15
18
  @client = Client.new(url)
16
- @client.resolve_redirect
19
+ regular_url = Redirect.new.resolve_redirect(url)
17
20
  {
18
- remote_client: @config.read("use_remote_client"),
19
- oga: @client.oga
21
+ dom: Dom.new.oga(regular_url),
22
+ screenshot: @client.get_screenshot,
20
23
  }
21
24
  end
22
25
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coelacanth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yusuke
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-29 00:00:00.000000000 Z
11
+ date: 2024-10-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  coelacanth is a gem that allows you to easily parse and analyze web pages,
@@ -21,7 +21,6 @@ extra_rdoc_files: []
21
21
  files:
22
22
  - ".rspec"
23
23
  - ".rubocop.yml"
24
- - CHANGELOG.md
25
24
  - CODE_OF_CONDUCT.md
26
25
  - Dockerfile
27
26
  - Gemfile
@@ -34,6 +33,9 @@ files:
34
33
  - lib/coelacanth.rb
35
34
  - lib/coelacanth/client.rb
36
35
  - lib/coelacanth/configure.rb
36
+ - lib/coelacanth/dom.rb
37
+ - lib/coelacanth/redirect.rb
38
+ - lib/coelacanth/validator.rb
37
39
  - lib/coelacanth/version.rb
38
40
  homepage: https://github.com/slidict/coelacanth
39
41
  licenses:
data/CHANGELOG.md DELETED
@@ -1,5 +0,0 @@
1
- ## [Unreleased]
2
-
3
- ## [0.1.0] - 2024-08-24
4
-
5
- - Initial release