coelacanth 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 29b00f207c71fb604269ddb59b70064509dce9a5abc189c5ba98a17fc3701815
4
- data.tar.gz: fe1487e4f8b15ebbdc6312dce95d2e50b3338871fedb1cb5c49ec8098d97951c
3
+ metadata.gz: 320be95cd3ffe6b905abacda5aa1a7fad00324bd79eafbe9969c3f80ed2695b5
4
+ data.tar.gz: f53305c6f1a07080ce16412ad4d1eb272691f27f50ccb468ee6f8bcb9bf69382
5
5
  SHA512:
6
- metadata.gz: 462bbd1ee309ca534b42bdac616c4145f7fc3320971d391c025d7bd9080850b61fb773f8a84d5286e73321c90732f897c9b1760c68c6542d5bd3abdfba5b23d9
7
- data.tar.gz: ec1071051319687f27e989871cc4cc78a90fbbf2e07c52310e7150fa651efd908a3a2cde8424ed7abfe03755afca3661a6b64477e37be8f7b81d8427caf02f07
6
+ metadata.gz: 3201b30bcde133d5edc963d2522627082783df3565bff9644c8a9c33e2ade577cc8f3ca80b97d6a47ccfa9fc9734a768b94842386883604dd60b69176cc636c0
7
+ data.tar.gz: 9faaed9ade6c3e6258b7480fe6b2007a715f6b338413b09079b8e523823c4392ee82ec8fe5570dd71236d126c75b7e59fa0161380c5ea15c5df8a62f3d697c74
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- coelacanth (0.1.5)
4
+ coelacanth (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -31,7 +31,8 @@ GEM
31
31
  rainbow (3.1.1)
32
32
  rake (13.1.0)
33
33
  regexp_parser (2.9.0)
34
- rexml (3.2.6)
34
+ rexml (3.3.6)
35
+ strscan
35
36
  rspec (3.10.0)
36
37
  rspec-core (~> 3.10.0)
37
38
  rspec-expectations (~> 3.10.0)
@@ -62,6 +63,7 @@ GEM
62
63
  ansi
63
64
  ast
64
65
  ruby-progressbar (1.13.0)
66
+ strscan (3.1.0)
65
67
  unicode-display_width (2.5.0)
66
68
  webrick (1.8.1)
67
69
  websocket-driver (0.7.6)
data/README.md CHANGED
@@ -50,11 +50,12 @@ Then, you can easily parse and extract information from a web page like this:
50
50
 
51
51
  ```ruby
52
52
  url = "https://example.com"
53
- stats = Coelacanth.analyze(url) # Not yet implement
53
+ stats = Coelacanth.analyze(url)
54
54
  ```
55
55
 
56
56
  ## Features
57
- - More features coming soon!
57
+ - Get dom by oga
58
+ - Get screenshot
58
59
 
59
60
  ## Commit Message Guidelines
60
61
 
data/compose.yml CHANGED
@@ -18,6 +18,5 @@ services:
18
18
  - app-tier
19
19
  chrome:
20
20
  image: browserless/chrome:latest
21
- container_name: chrome
22
21
  networks:
23
22
  - app-tier
@@ -1,5 +1,4 @@
1
1
  development: &development
2
- use_remote_client: false
3
2
  remote_client:
4
3
  ws_url: "ws://chrome:3000/chrome"
5
4
  timeout: 10 # seconds
@@ -1,76 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "ferrum"
4
- require "oga"
5
4
 
6
5
  module Coelacanth
7
6
  # Coelacanth::Client
8
7
  class Client
9
- def initialize(url = nil)
8
+ def initialize(url)
9
+ @validator = Validator.new
10
+ raise URI::InvalidURIError unless @validator.valid_url?(url)
10
11
  @config = Coelacanth.config
11
- @url = url if url && valid_url?(url)
12
- end
13
-
14
- def valid_url?(url = nil)
15
- @url = url if url
16
- uri = URI.parse(@url)
17
- uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
18
- rescue URI::InvalidURIError
19
- false
20
- end
21
-
22
- def resolve_redirect(url = nil, limit = 10)
23
- @url = url if url && valid_url?(url)
24
- raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
25
- raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
26
-
27
- get_response(@url)
28
- handle_response(@origin_response, limit)
29
- end
30
-
31
- def oga(url = nil)
32
- @url = url if url && valid_url?(url)
33
- Oga.parse_xml(get_response(@url))
12
+ remote_client.goto(url)
34
13
  end
35
14
 
36
15
  def get_response(url = nil)
37
- @url = url if url && valid_url?(url)
38
- if @config.read("use_remote_client")
39
- response_by_remote_client
40
- else
41
- response_by_net_http
42
- end
43
- end
44
-
45
- private
46
-
47
- def handle_response(response, limit)
48
- codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
49
- case @status_code.to_s
50
- when /^#{codes[Net::HTTPSuccess]}\d\d$/
51
- @url
52
- when /^#{codes[Net::HTTPRedirection]}\d\d$/
53
- @url = response["location"]
54
- resolve_redirect(response["location"], limit - 1)
55
- else
56
- raise Coelacanth::RedirectError
57
- end
58
- end
59
-
60
- def response_by_remote_client
61
- remote_client.goto(@url)
62
16
  @status_code = remote_client.network.status
63
17
  @origin_response = remote_client
64
18
  remote_client.body
65
19
  end
66
20
 
67
- def response_by_net_http
68
- response = Net::HTTP.get_response(URI.parse(@url))
69
- @status_code = response.code
70
- @origin_response = response
71
- response.body
21
+ def get_screenshot
22
+ tempfile = Tempfile.new
23
+ remote_client.screenshot(path: tempfile.path, format: "png")
24
+ File.read(tempfile.path)
72
25
  end
73
26
 
27
+ private
28
+
74
29
  def remote_client
75
30
  if @remote_client.nil?
76
31
  headers = @config.read("remote_client.headers")
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "oga"
4
+
5
+ module Coelacanth
6
+ # Coelacanth::Dom
7
+ class Dom
8
+ def oga(url)
9
+ Oga.parse_xml(Client.new(url).get_response)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+ require "oga"
5
+
6
+ module Coelacanth
7
+ # Coelacanth::Redirect
8
+ class Redirect
9
+ def resolve_redirect(url, limit = 10)
10
+ @url = url if url && Validator.new.valid_url?(url)
11
+ raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
12
+ raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
13
+
14
+ response = Net::HTTP.get_response(URI.parse(@url))
15
+ @status_code = response.code
16
+ @origin_response = response
17
+
18
+ handle_response(@origin_response, limit)
19
+ end
20
+
21
+ private
22
+
23
+ def handle_response(response, limit)
24
+ codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
25
+ case @status_code.to_s
26
+ when /^#{codes[Net::HTTPSuccess]}\d\d$/
27
+ @url
28
+ when /^#{codes[Net::HTTPRedirection]}\d\d$/
29
+ @url = response["location"]
30
+ resolve_redirect(response["location"], limit - 1)
31
+ else
32
+ raise Coelacanth::RedirectError
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+
5
+ module Coelacanth
6
+ # Coelacanth::Validator
7
+ class Validator
8
+ def valid_url?(url)
9
+ uri = URI.parse(url)
10
+ uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
11
+ rescue URI::InvalidURIError
12
+ false
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coelacanth
4
- VERSION = "0.1.5"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/coelacanth.rb CHANGED
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "net/http"
4
- require_relative "coelacanth/version"
5
4
  require_relative "coelacanth/configure"
6
5
  require_relative "coelacanth/client"
6
+ require_relative "coelacanth/dom"
7
+ require_relative "coelacanth/redirect"
8
+ require_relative "coelacanth/validator"
9
+ require_relative "coelacanth/version"
7
10
 
8
11
  # Coelacanth
9
12
  module Coelacanth
@@ -13,10 +16,10 @@ module Coelacanth
13
16
 
14
17
  def self.analyze(url)
15
18
  @client = Client.new(url)
16
- @client.resolve_redirect
19
+ regular_url = Redirect.new.resolve_redirect(url)
17
20
  {
18
- remote_client: @config.read("use_remote_client"),
19
- oga: @client.oga
21
+ dom: Dom.new.oga(regular_url),
22
+ screenshot: @client.get_screenshot,
20
23
  }
21
24
  end
22
25
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coelacanth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yusuke
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-29 00:00:00.000000000 Z
11
+ date: 2024-09-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  coelacanth is a gem that allows you to easily parse and analyze web pages,
@@ -21,7 +21,6 @@ extra_rdoc_files: []
21
21
  files:
22
22
  - ".rspec"
23
23
  - ".rubocop.yml"
24
- - CHANGELOG.md
25
24
  - CODE_OF_CONDUCT.md
26
25
  - Dockerfile
27
26
  - Gemfile
@@ -34,6 +33,9 @@ files:
34
33
  - lib/coelacanth.rb
35
34
  - lib/coelacanth/client.rb
36
35
  - lib/coelacanth/configure.rb
36
+ - lib/coelacanth/dom.rb
37
+ - lib/coelacanth/redirect.rb
38
+ - lib/coelacanth/validator.rb
37
39
  - lib/coelacanth/version.rb
38
40
  homepage: https://github.com/slidict/coelacanth
39
41
  licenses:
data/CHANGELOG.md DELETED
@@ -1,5 +0,0 @@
1
- ## [Unreleased]
2
-
3
- ## [0.1.0] - 2024-08-24
4
-
5
- - Initial release