coelacanth 0.1.6 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0172679388ce0d8718122988de86589b623e0ab432f9e08b589dd22e2009f74a'
4
- data.tar.gz: baa0edff8a94564e33cc8596e44cfe49a040dd920798a10bbca824c2ce4d1f89
3
+ metadata.gz: 506450022b367ef2795c9d67d66548b1893757dd740512286fb90f9b952f21d1
4
+ data.tar.gz: df860c4216846549ac335b15dbef4cd9f99aa068a464a47b58ba973ba9d3b771
5
5
  SHA512:
6
- metadata.gz: 16564c8664acf81dddb9188f828ab7e59da5969c610302e64325e6233f928a908e404332b244c48f3b9e0a7e984d88d3713ca885600cd1cd9a6e3057e273630a
7
- data.tar.gz: 543fff73d944bf4b328e0bd3cebf975b9aaa093e2553d36c61ef09cefd1cc76ac997387cebba43580f059d497899006107a6bb0473afc2524f10423fa3cfad54
6
+ metadata.gz: f4f483791381bf5672c74c382c73129703983c936720184cdd870db211cb740bb5ff6685460c6152213eea8dfcce85c6666ffa189d23e3b58e92c183658b486e
7
+ data.tar.gz: 577737cc29a81134a288bb3bee66a220d46f80749a9824ff5b487b62dfe97a8f1209124266715c58d9021e7e88d47e634f675462e9d27e62bd8e3e93c532e31d
data/Gemfile CHANGED
@@ -6,7 +6,7 @@ source "https://rubygems.org"
6
6
  gemspec
7
7
 
8
8
  gem "ferrum", "~> 0.15"
9
- gem "rake", "~> 13.0"
9
+ gem "rake", "~> 13.2"
10
10
  gem "rspec", "~> 3.0"
11
11
  gem "rubocop", "~> 1.21"
12
12
  gem "oga", "~> 3.4"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- coelacanth (0.1.6)
4
+ coelacanth (0.2.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -11,59 +11,57 @@ GEM
11
11
  ansi (1.5.0)
12
12
  ast (2.4.2)
13
13
  concurrent-ruby (1.3.4)
14
- diff-lcs (1.4.4)
14
+ diff-lcs (1.5.1)
15
15
  ferrum (0.15)
16
16
  addressable (~> 2.5)
17
17
  concurrent-ruby (~> 1.1)
18
18
  webrick (~> 1.7)
19
19
  websocket-driver (~> 0.7)
20
- json (2.7.1)
20
+ json (2.7.2)
21
21
  language_server-protocol (3.17.0.3)
22
22
  oga (3.4)
23
23
  ast
24
24
  ruby-ll (~> 2.1)
25
- parallel (1.24.0)
26
- parser (3.3.0.5)
25
+ parallel (1.26.3)
26
+ parser (3.3.5.0)
27
27
  ast (~> 2.4.1)
28
28
  racc
29
29
  public_suffix (6.0.1)
30
- racc (1.7.3)
30
+ racc (1.8.1)
31
31
  rainbow (3.1.1)
32
- rake (13.1.0)
33
- regexp_parser (2.9.0)
34
- rexml (3.2.6)
35
- rspec (3.10.0)
36
- rspec-core (~> 3.10.0)
37
- rspec-expectations (~> 3.10.0)
38
- rspec-mocks (~> 3.10.0)
39
- rspec-core (3.10.1)
40
- rspec-support (~> 3.10.0)
41
- rspec-expectations (3.10.1)
32
+ rake (13.2.1)
33
+ regexp_parser (2.9.2)
34
+ rspec (3.13.0)
35
+ rspec-core (~> 3.13.0)
36
+ rspec-expectations (~> 3.13.0)
37
+ rspec-mocks (~> 3.13.0)
38
+ rspec-core (3.13.2)
39
+ rspec-support (~> 3.13.0)
40
+ rspec-expectations (3.13.3)
42
41
  diff-lcs (>= 1.2.0, < 2.0)
43
- rspec-support (~> 3.10.0)
44
- rspec-mocks (3.10.2)
42
+ rspec-support (~> 3.13.0)
43
+ rspec-mocks (3.13.2)
45
44
  diff-lcs (>= 1.2.0, < 2.0)
46
- rspec-support (~> 3.10.0)
47
- rspec-support (3.10.2)
48
- rubocop (1.62.1)
45
+ rspec-support (~> 3.13.0)
46
+ rspec-support (3.13.1)
47
+ rubocop (1.67.0)
49
48
  json (~> 2.3)
50
49
  language_server-protocol (>= 3.17.0)
51
50
  parallel (~> 1.10)
52
51
  parser (>= 3.3.0.2)
53
52
  rainbow (>= 2.2.2, < 4.0)
54
- regexp_parser (>= 1.8, < 3.0)
55
- rexml (>= 3.2.5, < 4.0)
56
- rubocop-ast (>= 1.31.1, < 2.0)
53
+ regexp_parser (>= 2.4, < 3.0)
54
+ rubocop-ast (>= 1.32.2, < 2.0)
57
55
  ruby-progressbar (~> 1.7)
58
56
  unicode-display_width (>= 2.4.0, < 3.0)
59
- rubocop-ast (1.31.2)
60
- parser (>= 3.3.0.4)
57
+ rubocop-ast (1.32.3)
58
+ parser (>= 3.3.1.0)
61
59
  ruby-ll (2.1.3)
62
60
  ansi
63
61
  ast
64
62
  ruby-progressbar (1.13.0)
65
- unicode-display_width (2.5.0)
66
- webrick (1.8.1)
63
+ unicode-display_width (2.6.0)
64
+ webrick (1.8.2)
67
65
  websocket-driver (0.7.6)
68
66
  websocket-extensions (>= 0.1.0)
69
67
  websocket-extensions (0.1.5)
@@ -76,9 +74,9 @@ DEPENDENCIES
76
74
  coelacanth!
77
75
  ferrum (~> 0.15)
78
76
  oga (~> 3.4)
79
- rake (~> 13.0)
77
+ rake (~> 13.2)
80
78
  rspec (~> 3.0)
81
79
  rubocop (~> 1.21)
82
80
 
83
81
  BUNDLED WITH
84
- 2.5.7
82
+ 2.5.17
data/README.md CHANGED
@@ -50,11 +50,12 @@ Then, you can easily parse and extract information from a web page like this:
50
50
 
51
51
  ```ruby
52
52
  url = "https://example.com"
53
- stats = Coelacanth.analyze(url) # Not yet implement
53
+ stats = Coelacanth.analyze(url)
54
54
  ```
55
55
 
56
56
  ## Features
57
- - More features coming soon!
57
+ - Get dom by oga
58
+ - Get screenshot
58
59
 
59
60
  ## Commit Message Guidelines
60
61
 
data/compose.yml CHANGED
@@ -18,6 +18,5 @@ services:
18
18
  - app-tier
19
19
  chrome:
20
20
  image: browserless/chrome:latest
21
- container_name: chrome
22
21
  networks:
23
22
  - app-tier
@@ -1,5 +1,4 @@
1
1
  development: &development
2
- use_remote_client: false
3
2
  remote_client:
4
3
  ws_url: "ws://chrome:3000/chrome"
5
4
  timeout: 10 # seconds
@@ -1,76 +1,34 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "ferrum"
4
- require "oga"
5
4
 
6
5
  module Coelacanth
7
6
  # Coelacanth::Client
8
7
  class Client
9
- def initialize(url = nil)
8
+ def initialize(url)
9
+ @validator = Validator.new
10
+ raise URI::InvalidURIError unless @validator.valid_url?(url)
10
11
  @config = Coelacanth.config
11
- @url = url if url && valid_url?(url)
12
- end
13
-
14
- def valid_url?(url = nil)
15
- @url = url if url
16
- uri = URI.parse(@url)
17
- uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
18
- rescue URI::InvalidURIError
19
- false
20
- end
21
-
22
- def resolve_redirect(url = nil, limit = 10)
23
- @url = url if url && valid_url?(url)
24
- raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
25
- raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
26
-
27
- get_response(@url)
28
- handle_response(@origin_response, limit)
29
- end
30
-
31
- def oga(url = nil)
32
- @url = url if url && valid_url?(url)
33
- Oga.parse_xml(get_response(@url))
12
+ remote_client.goto(url)
34
13
  end
35
14
 
36
15
  def get_response(url = nil)
37
- @url = url if url && valid_url?(url)
38
- if @config.read("use_remote_client")
39
- response_by_remote_client
40
- else
41
- response_by_net_http
42
- end
43
- end
44
-
45
- private
46
-
47
- def handle_response(response, limit)
48
- codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
49
- case @status_code.to_s
50
- when /^#{codes[Net::HTTPSuccess]}\d\d$/
51
- @url
52
- when /^#{codes[Net::HTTPRedirection]}\d\d$/
53
- @url = response["location"]
54
- resolve_redirect(response["location"], limit - 1)
55
- else
56
- raise Coelacanth::RedirectError
57
- end
58
- end
59
-
60
- def response_by_remote_client
61
- remote_client.goto(@url)
62
16
  @status_code = remote_client.network.status
63
17
  @origin_response = remote_client
64
- remote_client.body
18
+ body = remote_client.body
19
+ page.network.wait_for_idle! # might raise an error
20
+ body
65
21
  end
66
22
 
67
- def response_by_net_http
68
- response = Net::HTTP.get_response(URI.parse(@url))
69
- @status_code = response.code
70
- @origin_response = response
71
- response.body
23
+ def get_screenshot
24
+ tempfile = Tempfile.new
25
+ remote_client.screenshot(path: tempfile.path, format: "png")
26
+ page.network.wait_for_idle! # might raise an error
27
+ File.read(tempfile.path)
72
28
  end
73
29
 
30
+ private
31
+
74
32
  def remote_client
75
33
  if @remote_client.nil?
76
34
  headers = @config.read("remote_client.headers")
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "oga"
4
+
5
+ module Coelacanth
6
+ # Coelacanth::Dom
7
+ class Dom
8
+ def oga(url)
9
+ Oga.parse_xml(Client.new(url).get_response)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+ require "oga"
5
+
6
+ module Coelacanth
7
+ # Coelacanth::Redirect
8
+ class Redirect
9
+ def resolve_redirect(url, limit = 10)
10
+ @url = url if url && Validator.new.valid_url?(url)
11
+ raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
12
+ raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
13
+
14
+ response = Net::HTTP.get_response(URI.parse(@url))
15
+ @status_code = response.code
16
+ @origin_response = response
17
+
18
+ handle_response(@origin_response, limit)
19
+ end
20
+
21
+ private
22
+
23
+ def handle_response(response, limit)
24
+ codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
25
+ case @status_code.to_s
26
+ when /^#{codes[Net::HTTPSuccess]}\d\d$/
27
+ @url
28
+ when /^#{codes[Net::HTTPRedirection]}\d\d$/
29
+ @url = response["location"]
30
+ resolve_redirect(response["location"], limit - 1)
31
+ else
32
+ raise Coelacanth::RedirectError
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+
5
+ module Coelacanth
6
+ # Coelacanth::Validator
7
+ class Validator
8
+ def valid_url?(url)
9
+ uri = URI.parse(url)
10
+ uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
11
+ rescue URI::InvalidURIError
12
+ false
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coelacanth
4
- VERSION = "0.1.6"
4
+ VERSION = "0.2.2"
5
5
  end
data/lib/coelacanth.rb CHANGED
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "net/http"
4
- require_relative "coelacanth/version"
5
4
  require_relative "coelacanth/configure"
6
5
  require_relative "coelacanth/client"
6
+ require_relative "coelacanth/dom"
7
+ require_relative "coelacanth/redirect"
8
+ require_relative "coelacanth/validator"
9
+ require_relative "coelacanth/version"
7
10
 
8
11
  # Coelacanth
9
12
  module Coelacanth
@@ -13,10 +16,10 @@ module Coelacanth
13
16
 
14
17
  def self.analyze(url)
15
18
  @client = Client.new(url)
16
- @client.resolve_redirect
19
+ regular_url = Redirect.new.resolve_redirect(url)
17
20
  {
18
- remote_client: @config.read("use_remote_client"),
19
- oga: @client.oga
21
+ dom: Dom.new.oga(regular_url),
22
+ screenshot: @client.get_screenshot,
20
23
  }
21
24
  end
22
25
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coelacanth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yusuke
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-29 00:00:00.000000000 Z
11
+ date: 2024-10-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  coelacanth is a gem that allows you to easily parse and analyze web pages,
@@ -21,7 +21,6 @@ extra_rdoc_files: []
21
21
  files:
22
22
  - ".rspec"
23
23
  - ".rubocop.yml"
24
- - CHANGELOG.md
25
24
  - CODE_OF_CONDUCT.md
26
25
  - Dockerfile
27
26
  - Gemfile
@@ -34,6 +33,9 @@ files:
34
33
  - lib/coelacanth.rb
35
34
  - lib/coelacanth/client.rb
36
35
  - lib/coelacanth/configure.rb
36
+ - lib/coelacanth/dom.rb
37
+ - lib/coelacanth/redirect.rb
38
+ - lib/coelacanth/validator.rb
37
39
  - lib/coelacanth/version.rb
38
40
  homepage: https://github.com/slidict/coelacanth
39
41
  licenses:
data/CHANGELOG.md DELETED
@@ -1,5 +0,0 @@
1
- ## [Unreleased]
2
-
3
- ## [0.1.0] - 2024-08-24
4
-
5
- - Initial release