coelacanth 0.1.6 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0172679388ce0d8718122988de86589b623e0ab432f9e08b589dd22e2009f74a'
4
- data.tar.gz: baa0edff8a94564e33cc8596e44cfe49a040dd920798a10bbca824c2ce4d1f89
3
+ metadata.gz: 320be95cd3ffe6b905abacda5aa1a7fad00324bd79eafbe9969c3f80ed2695b5
4
+ data.tar.gz: f53305c6f1a07080ce16412ad4d1eb272691f27f50ccb468ee6f8bcb9bf69382
5
5
  SHA512:
6
- metadata.gz: 16564c8664acf81dddb9188f828ab7e59da5969c610302e64325e6233f928a908e404332b244c48f3b9e0a7e984d88d3713ca885600cd1cd9a6e3057e273630a
7
- data.tar.gz: 543fff73d944bf4b328e0bd3cebf975b9aaa093e2553d36c61ef09cefd1cc76ac997387cebba43580f059d497899006107a6bb0473afc2524f10423fa3cfad54
6
+ metadata.gz: 3201b30bcde133d5edc963d2522627082783df3565bff9644c8a9c33e2ade577cc8f3ca80b97d6a47ccfa9fc9734a768b94842386883604dd60b69176cc636c0
7
+ data.tar.gz: 9faaed9ade6c3e6258b7480fe6b2007a715f6b338413b09079b8e523823c4392ee82ec8fe5570dd71236d126c75b7e59fa0161380c5ea15c5df8a62f3d697c74
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- coelacanth (0.1.6)
4
+ coelacanth (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -31,7 +31,8 @@ GEM
31
31
  rainbow (3.1.1)
32
32
  rake (13.1.0)
33
33
  regexp_parser (2.9.0)
34
- rexml (3.2.6)
34
+ rexml (3.3.6)
35
+ strscan
35
36
  rspec (3.10.0)
36
37
  rspec-core (~> 3.10.0)
37
38
  rspec-expectations (~> 3.10.0)
@@ -62,6 +63,7 @@ GEM
62
63
  ansi
63
64
  ast
64
65
  ruby-progressbar (1.13.0)
66
+ strscan (3.1.0)
65
67
  unicode-display_width (2.5.0)
66
68
  webrick (1.8.1)
67
69
  websocket-driver (0.7.6)
data/README.md CHANGED
@@ -50,11 +50,12 @@ Then, you can easily parse and extract information from a web page like this:
50
50
 
51
51
  ```ruby
52
52
  url = "https://example.com"
53
- stats = Coelacanth.analyze(url) # Not yet implement
53
+ stats = Coelacanth.analyze(url)
54
54
  ```
55
55
 
56
56
  ## Features
57
- - More features coming soon!
57
+ - Get dom by oga
58
+ - Get screenshot
58
59
 
59
60
  ## Commit Message Guidelines
60
61
 
data/compose.yml CHANGED
@@ -18,6 +18,5 @@ services:
18
18
  - app-tier
19
19
  chrome:
20
20
  image: browserless/chrome:latest
21
- container_name: chrome
22
21
  networks:
23
22
  - app-tier
@@ -1,5 +1,4 @@
1
1
  development: &development
2
- use_remote_client: false
3
2
  remote_client:
4
3
  ws_url: "ws://chrome:3000/chrome"
5
4
  timeout: 10 # seconds
@@ -1,76 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "ferrum"
4
- require "oga"
5
4
 
6
5
  module Coelacanth
7
6
  # Coelacanth::Client
8
7
  class Client
9
- def initialize(url = nil)
8
+ def initialize(url)
9
+ @validator = Validator.new
10
+ raise URI::InvalidURIError unless @validator.valid_url?(url)
10
11
  @config = Coelacanth.config
11
- @url = url if url && valid_url?(url)
12
- end
13
-
14
- def valid_url?(url = nil)
15
- @url = url if url
16
- uri = URI.parse(@url)
17
- uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
18
- rescue URI::InvalidURIError
19
- false
20
- end
21
-
22
- def resolve_redirect(url = nil, limit = 10)
23
- @url = url if url && valid_url?(url)
24
- raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
25
- raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
26
-
27
- get_response(@url)
28
- handle_response(@origin_response, limit)
29
- end
30
-
31
- def oga(url = nil)
32
- @url = url if url && valid_url?(url)
33
- Oga.parse_xml(get_response(@url))
12
+ remote_client.goto(url)
34
13
  end
35
14
 
36
15
  def get_response(url = nil)
37
- @url = url if url && valid_url?(url)
38
- if @config.read("use_remote_client")
39
- response_by_remote_client
40
- else
41
- response_by_net_http
42
- end
43
- end
44
-
45
- private
46
-
47
- def handle_response(response, limit)
48
- codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
49
- case @status_code.to_s
50
- when /^#{codes[Net::HTTPSuccess]}\d\d$/
51
- @url
52
- when /^#{codes[Net::HTTPRedirection]}\d\d$/
53
- @url = response["location"]
54
- resolve_redirect(response["location"], limit - 1)
55
- else
56
- raise Coelacanth::RedirectError
57
- end
58
- end
59
-
60
- def response_by_remote_client
61
- remote_client.goto(@url)
62
16
  @status_code = remote_client.network.status
63
17
  @origin_response = remote_client
64
18
  remote_client.body
65
19
  end
66
20
 
67
- def response_by_net_http
68
- response = Net::HTTP.get_response(URI.parse(@url))
69
- @status_code = response.code
70
- @origin_response = response
71
- response.body
21
+ def get_screenshot
22
+ tempfile = Tempfile.new
23
+ remote_client.screenshot(path: tempfile.path, format: "png")
24
+ File.read(tempfile.path)
72
25
  end
73
26
 
27
+ private
28
+
74
29
  def remote_client
75
30
  if @remote_client.nil?
76
31
  headers = @config.read("remote_client.headers")
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "oga"
4
+
5
+ module Coelacanth
6
+ # Coelacanth::Dom
7
+ class Dom
8
+ def oga(url)
9
+ Oga.parse_xml(Client.new(url).get_response)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+ require "oga"
5
+
6
+ module Coelacanth
7
+ # Coelacanth::Redirect
8
+ class Redirect
9
+ def resolve_redirect(url, limit = 10)
10
+ @url = url if url && Validator.new.valid_url?(url)
11
+ raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
12
+ raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
13
+
14
+ response = Net::HTTP.get_response(URI.parse(@url))
15
+ @status_code = response.code
16
+ @origin_response = response
17
+
18
+ handle_response(@origin_response, limit)
19
+ end
20
+
21
+ private
22
+
23
+ def handle_response(response, limit)
24
+ codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
25
+ case @status_code.to_s
26
+ when /^#{codes[Net::HTTPSuccess]}\d\d$/
27
+ @url
28
+ when /^#{codes[Net::HTTPRedirection]}\d\d$/
29
+ @url = response["location"]
30
+ resolve_redirect(response["location"], limit - 1)
31
+ else
32
+ raise Coelacanth::RedirectError
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+
5
+ module Coelacanth
6
+ # Coelacanth::Validator
7
+ class Validator
8
+ def valid_url?(url)
9
+ uri = URI.parse(url)
10
+ uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
11
+ rescue URI::InvalidURIError
12
+ false
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coelacanth
4
- VERSION = "0.1.6"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/coelacanth.rb CHANGED
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "net/http"
4
- require_relative "coelacanth/version"
5
4
  require_relative "coelacanth/configure"
6
5
  require_relative "coelacanth/client"
6
+ require_relative "coelacanth/dom"
7
+ require_relative "coelacanth/redirect"
8
+ require_relative "coelacanth/validator"
9
+ require_relative "coelacanth/version"
7
10
 
8
11
  # Coelacanth
9
12
  module Coelacanth
@@ -13,10 +16,10 @@ module Coelacanth
13
16
 
14
17
  def self.analyze(url)
15
18
  @client = Client.new(url)
16
- @client.resolve_redirect
19
+ regular_url = Redirect.new.resolve_redirect(url)
17
20
  {
18
- remote_client: @config.read("use_remote_client"),
19
- oga: @client.oga
21
+ dom: Dom.new.oga(regular_url),
22
+ screenshot: @client.get_screenshot,
20
23
  }
21
24
  end
22
25
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coelacanth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yusuke
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-29 00:00:00.000000000 Z
11
+ date: 2024-09-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  coelacanth is a gem that allows you to easily parse and analyze web pages,
@@ -21,7 +21,6 @@ extra_rdoc_files: []
21
21
  files:
22
22
  - ".rspec"
23
23
  - ".rubocop.yml"
24
- - CHANGELOG.md
25
24
  - CODE_OF_CONDUCT.md
26
25
  - Dockerfile
27
26
  - Gemfile
@@ -34,6 +33,9 @@ files:
34
33
  - lib/coelacanth.rb
35
34
  - lib/coelacanth/client.rb
36
35
  - lib/coelacanth/configure.rb
36
+ - lib/coelacanth/dom.rb
37
+ - lib/coelacanth/redirect.rb
38
+ - lib/coelacanth/validator.rb
37
39
  - lib/coelacanth/version.rb
38
40
  homepage: https://github.com/slidict/coelacanth
39
41
  licenses:
data/CHANGELOG.md DELETED
@@ -1,5 +0,0 @@
1
- ## [Unreleased]
2
-
3
- ## [0.1.0] - 2024-08-24
4
-
5
- - Initial release