coelacanth 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -2
- data/README.md +3 -2
- data/compose.yml +0 -1
- data/config/coelacanth.yml +0 -1
- data/lib/coelacanth/client.rb +10 -55
- data/lib/coelacanth/dom.rb +12 -0
- data/lib/coelacanth/redirect.rb +36 -0
- data/lib/coelacanth/validator.rb +15 -0
- data/lib/coelacanth/version.rb +1 -1
- data/lib/coelacanth.rb +7 -4
- metadata +5 -3
- data/CHANGELOG.md +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 320be95cd3ffe6b905abacda5aa1a7fad00324bd79eafbe9969c3f80ed2695b5
|
4
|
+
data.tar.gz: f53305c6f1a07080ce16412ad4d1eb272691f27f50ccb468ee6f8bcb9bf69382
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3201b30bcde133d5edc963d2522627082783df3565bff9644c8a9c33e2ade577cc8f3ca80b97d6a47ccfa9fc9734a768b94842386883604dd60b69176cc636c0
|
7
|
+
data.tar.gz: 9faaed9ade6c3e6258b7480fe6b2007a715f6b338413b09079b8e523823c4392ee82ec8fe5570dd71236d126c75b7e59fa0161380c5ea15c5df8a62f3d697c74
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
coelacanth (0.
|
4
|
+
coelacanth (0.2.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -31,7 +31,8 @@ GEM
|
|
31
31
|
rainbow (3.1.1)
|
32
32
|
rake (13.1.0)
|
33
33
|
regexp_parser (2.9.0)
|
34
|
-
rexml (3.
|
34
|
+
rexml (3.3.6)
|
35
|
+
strscan
|
35
36
|
rspec (3.10.0)
|
36
37
|
rspec-core (~> 3.10.0)
|
37
38
|
rspec-expectations (~> 3.10.0)
|
@@ -62,6 +63,7 @@ GEM
|
|
62
63
|
ansi
|
63
64
|
ast
|
64
65
|
ruby-progressbar (1.13.0)
|
66
|
+
strscan (3.1.0)
|
65
67
|
unicode-display_width (2.5.0)
|
66
68
|
webrick (1.8.1)
|
67
69
|
websocket-driver (0.7.6)
|
data/README.md
CHANGED
@@ -50,11 +50,12 @@ Then, you can easily parse and extract information from a web page like this:
|
|
50
50
|
|
51
51
|
```ruby
|
52
52
|
url = "https://example.com"
|
53
|
-
stats = Coelacanth.analyze(url)
|
53
|
+
stats = Coelacanth.analyze(url)
|
54
54
|
```
|
55
55
|
|
56
56
|
## Features
|
57
|
-
-
|
57
|
+
- Get dom by oga
|
58
|
+
- Get screenshot
|
58
59
|
|
59
60
|
## Commit Message Guidelines
|
60
61
|
|
data/compose.yml
CHANGED
data/config/coelacanth.yml
CHANGED
data/lib/coelacanth/client.rb
CHANGED
@@ -1,76 +1,31 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "ferrum"
|
4
|
-
require "oga"
|
5
4
|
|
6
5
|
module Coelacanth
|
7
6
|
# Coelacanth::Client
|
8
7
|
class Client
|
9
|
-
def initialize(url
|
8
|
+
def initialize(url)
|
9
|
+
@validator = Validator.new
|
10
|
+
raise URI::InvalidURIError unless @validator.valid_url?(url)
|
10
11
|
@config = Coelacanth.config
|
11
|
-
|
12
|
-
end
|
13
|
-
|
14
|
-
def valid_url?(url = nil)
|
15
|
-
@url = url if url
|
16
|
-
uri = URI.parse(@url)
|
17
|
-
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
18
|
-
rescue URI::InvalidURIError
|
19
|
-
false
|
20
|
-
end
|
21
|
-
|
22
|
-
def resolve_redirect(url = nil, limit = 10)
|
23
|
-
@url = url if url && valid_url?(url)
|
24
|
-
raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
|
25
|
-
raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
|
26
|
-
|
27
|
-
get_response(@url)
|
28
|
-
handle_response(@origin_response, limit)
|
29
|
-
end
|
30
|
-
|
31
|
-
def oga(url = nil)
|
32
|
-
@url = url if url && valid_url?(url)
|
33
|
-
Oga.parse_xml(get_response(@url))
|
12
|
+
remote_client.goto(url)
|
34
13
|
end
|
35
14
|
|
36
15
|
def get_response(url = nil)
|
37
|
-
@url = url if url && valid_url?(url)
|
38
|
-
if @config.read("use_remote_client")
|
39
|
-
response_by_remote_client
|
40
|
-
else
|
41
|
-
response_by_net_http
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
private
|
46
|
-
|
47
|
-
def handle_response(response, limit)
|
48
|
-
codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
|
49
|
-
case @status_code.to_s
|
50
|
-
when /^#{codes[Net::HTTPSuccess]}\d\d$/
|
51
|
-
@url
|
52
|
-
when /^#{codes[Net::HTTPRedirection]}\d\d$/
|
53
|
-
@url = response["location"]
|
54
|
-
resolve_redirect(response["location"], limit - 1)
|
55
|
-
else
|
56
|
-
raise Coelacanth::RedirectError
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def response_by_remote_client
|
61
|
-
remote_client.goto(@url)
|
62
16
|
@status_code = remote_client.network.status
|
63
17
|
@origin_response = remote_client
|
64
18
|
remote_client.body
|
65
19
|
end
|
66
20
|
|
67
|
-
def
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
response.body
|
21
|
+
def get_screenshot
|
22
|
+
tempfile = Tempfile.new
|
23
|
+
remote_client.screenshot(path: tempfile.path, format: "png")
|
24
|
+
File.read(tempfile.path)
|
72
25
|
end
|
73
26
|
|
27
|
+
private
|
28
|
+
|
74
29
|
def remote_client
|
75
30
|
if @remote_client.nil?
|
76
31
|
headers = @config.read("remote_client.headers")
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ferrum"
|
4
|
+
require "oga"
|
5
|
+
|
6
|
+
module Coelacanth
|
7
|
+
# Coelacanth::Redirect
|
8
|
+
class Redirect
|
9
|
+
def resolve_redirect(url, limit = 10)
|
10
|
+
@url = url if url && Validator.new.valid_url?(url)
|
11
|
+
raise Coelacanth::DeepRedirectError, "Too many redirect" if limit.zero?
|
12
|
+
raise Coelacanth::RedirectError, "Url or location is nil" if @url.nil?
|
13
|
+
|
14
|
+
response = Net::HTTP.get_response(URI.parse(@url))
|
15
|
+
@status_code = response.code
|
16
|
+
@origin_response = response
|
17
|
+
|
18
|
+
handle_response(@origin_response, limit)
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def handle_response(response, limit)
|
24
|
+
codes = Net::HTTPResponse::CODE_CLASS_TO_OBJ.invert
|
25
|
+
case @status_code.to_s
|
26
|
+
when /^#{codes[Net::HTTPSuccess]}\d\d$/
|
27
|
+
@url
|
28
|
+
when /^#{codes[Net::HTTPRedirection]}\d\d$/
|
29
|
+
@url = response["location"]
|
30
|
+
resolve_redirect(response["location"], limit - 1)
|
31
|
+
else
|
32
|
+
raise Coelacanth::RedirectError
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ferrum"
|
4
|
+
|
5
|
+
module Coelacanth
|
6
|
+
# Coelacanth::Validator
|
7
|
+
class Validator
|
8
|
+
def valid_url?(url)
|
9
|
+
uri = URI.parse(url)
|
10
|
+
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
11
|
+
rescue URI::InvalidURIError
|
12
|
+
false
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/coelacanth/version.rb
CHANGED
data/lib/coelacanth.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require "net/http"
|
4
|
-
require_relative "coelacanth/version"
|
5
4
|
require_relative "coelacanth/configure"
|
6
5
|
require_relative "coelacanth/client"
|
6
|
+
require_relative "coelacanth/dom"
|
7
|
+
require_relative "coelacanth/redirect"
|
8
|
+
require_relative "coelacanth/validator"
|
9
|
+
require_relative "coelacanth/version"
|
7
10
|
|
8
11
|
# Coelacanth
|
9
12
|
module Coelacanth
|
@@ -13,10 +16,10 @@ module Coelacanth
|
|
13
16
|
|
14
17
|
def self.analyze(url)
|
15
18
|
@client = Client.new(url)
|
16
|
-
|
19
|
+
regular_url = Redirect.new.resolve_redirect(url)
|
17
20
|
{
|
18
|
-
|
19
|
-
|
21
|
+
dom: Dom.new.oga(regular_url),
|
22
|
+
screenshot: @client.get_screenshot,
|
20
23
|
}
|
21
24
|
end
|
22
25
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coelacanth
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yusuke
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: |
|
14
14
|
coelacanth is a gem that allows you to easily parse and analyze web pages,
|
@@ -21,7 +21,6 @@ extra_rdoc_files: []
|
|
21
21
|
files:
|
22
22
|
- ".rspec"
|
23
23
|
- ".rubocop.yml"
|
24
|
-
- CHANGELOG.md
|
25
24
|
- CODE_OF_CONDUCT.md
|
26
25
|
- Dockerfile
|
27
26
|
- Gemfile
|
@@ -34,6 +33,9 @@ files:
|
|
34
33
|
- lib/coelacanth.rb
|
35
34
|
- lib/coelacanth/client.rb
|
36
35
|
- lib/coelacanth/configure.rb
|
36
|
+
- lib/coelacanth/dom.rb
|
37
|
+
- lib/coelacanth/redirect.rb
|
38
|
+
- lib/coelacanth/validator.rb
|
37
39
|
- lib/coelacanth/version.rb
|
38
40
|
homepage: https://github.com/slidict/coelacanth
|
39
41
|
licenses:
|