coelacanth 0.2.3 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -1
- data/Gemfile.lock +45 -24
- data/README.md +6 -0
- data/config/coelacanth.yml +3 -0
- data/lib/coelacanth/client/base.rb +27 -0
- data/lib/coelacanth/{client.rb → client/ferrum.rb} +4 -6
- data/lib/coelacanth/client/screenshot_one.rb +43 -0
- data/lib/coelacanth/dom.rb +1 -1
- data/lib/coelacanth/version.rb +1 -1
- data/lib/coelacanth.rb +5 -2
- metadata +6 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc0f98072fb81b09973a8acbee60d8f9b34c31cdb75e161f7d2f1e8f2bd9ee3c
|
4
|
+
data.tar.gz: fba2700643da179439dc2588717bb78b06fb6d9ef056c262ae792084e81ab4eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a7c6bd212ba78c390798d9b522f5f3f5927941a95eec56ec20e78c0d4744756cd8b3a5cc8eada99f8e5f4539309785d7b8dec1c6ab0454201450e700063016f
|
7
|
+
data.tar.gz: 7144aaf89b5ad33ebb8858e866c639c70da8aaffa9d028b7fe898d4fdf0c4d2cb65c4793d96af8bfa4cc2c595b1cb4559927dceda1368323b8d98cfb215363fb
|
data/Gemfile
CHANGED
@@ -5,8 +5,14 @@ source "https://rubygems.org"
|
|
5
5
|
# Specify your gem's dependencies in coelacanth.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem "ferrum", "~> 0.
|
8
|
+
gem "ferrum", "~> 0.16"
|
9
9
|
gem "rake", "~> 13.2"
|
10
10
|
gem "rspec", "~> 3.0"
|
11
11
|
gem "rubocop", "~> 1.21"
|
12
12
|
gem "oga", "~> 3.4"
|
13
|
+
gem "base64", "~> 0.2.0"
|
14
|
+
|
15
|
+
group :development, :test do
|
16
|
+
gem "webmock", "~> 3.25"
|
17
|
+
end
|
18
|
+
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
coelacanth (0.
|
4
|
+
coelacanth (0.3.1)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -9,33 +9,43 @@ GEM
|
|
9
9
|
addressable (2.8.7)
|
10
10
|
public_suffix (>= 2.0.2, < 7.0)
|
11
11
|
ansi (1.5.0)
|
12
|
-
ast (2.4.
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
ast (2.4.3)
|
13
|
+
base64 (0.2.0)
|
14
|
+
bigdecimal (3.1.9)
|
15
|
+
concurrent-ruby (1.3.5)
|
16
|
+
crack (1.0.0)
|
17
|
+
bigdecimal
|
18
|
+
rexml
|
19
|
+
diff-lcs (1.6.1)
|
20
|
+
ferrum (0.16)
|
16
21
|
addressable (~> 2.5)
|
22
|
+
base64 (~> 0.2)
|
17
23
|
concurrent-ruby (~> 1.1)
|
18
24
|
webrick (~> 1.7)
|
19
25
|
websocket-driver (~> 0.7)
|
20
|
-
|
21
|
-
|
26
|
+
hashdiff (1.1.2)
|
27
|
+
json (2.10.2)
|
28
|
+
language_server-protocol (3.17.0.4)
|
29
|
+
lint_roller (1.1.0)
|
22
30
|
oga (3.4)
|
23
31
|
ast
|
24
32
|
ruby-ll (~> 2.1)
|
25
|
-
parallel (1.
|
26
|
-
parser (3.3.
|
33
|
+
parallel (1.27.0)
|
34
|
+
parser (3.3.8.0)
|
27
35
|
ast (~> 2.4.1)
|
28
36
|
racc
|
37
|
+
prism (1.4.0)
|
29
38
|
public_suffix (6.0.1)
|
30
39
|
racc (1.8.1)
|
31
40
|
rainbow (3.1.1)
|
32
41
|
rake (13.2.1)
|
33
|
-
regexp_parser (2.
|
42
|
+
regexp_parser (2.10.0)
|
43
|
+
rexml (3.4.1)
|
34
44
|
rspec (3.13.0)
|
35
45
|
rspec-core (~> 3.13.0)
|
36
46
|
rspec-expectations (~> 3.13.0)
|
37
47
|
rspec-mocks (~> 3.13.0)
|
38
|
-
rspec-core (3.13.
|
48
|
+
rspec-core (3.13.3)
|
39
49
|
rspec-support (~> 3.13.0)
|
40
50
|
rspec-expectations (3.13.3)
|
41
51
|
diff-lcs (>= 1.2.0, < 2.0)
|
@@ -43,26 +53,35 @@ GEM
|
|
43
53
|
rspec-mocks (3.13.2)
|
44
54
|
diff-lcs (>= 1.2.0, < 2.0)
|
45
55
|
rspec-support (~> 3.13.0)
|
46
|
-
rspec-support (3.13.
|
47
|
-
rubocop (1.
|
56
|
+
rspec-support (3.13.2)
|
57
|
+
rubocop (1.75.3)
|
48
58
|
json (~> 2.3)
|
49
|
-
language_server-protocol (
|
59
|
+
language_server-protocol (~> 3.17.0.2)
|
60
|
+
lint_roller (~> 1.1.0)
|
50
61
|
parallel (~> 1.10)
|
51
62
|
parser (>= 3.3.0.2)
|
52
63
|
rainbow (>= 2.2.2, < 4.0)
|
53
|
-
regexp_parser (>= 2.
|
54
|
-
rubocop-ast (>= 1.
|
64
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
65
|
+
rubocop-ast (>= 1.44.0, < 2.0)
|
55
66
|
ruby-progressbar (~> 1.7)
|
56
|
-
unicode-display_width (>= 2.4.0, <
|
57
|
-
rubocop-ast (1.
|
58
|
-
parser (>= 3.3.
|
67
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
68
|
+
rubocop-ast (1.44.1)
|
69
|
+
parser (>= 3.3.7.2)
|
70
|
+
prism (~> 1.4)
|
59
71
|
ruby-ll (2.1.3)
|
60
72
|
ansi
|
61
73
|
ast
|
62
74
|
ruby-progressbar (1.13.0)
|
63
|
-
unicode-display_width (
|
64
|
-
|
65
|
-
|
75
|
+
unicode-display_width (3.1.4)
|
76
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
77
|
+
unicode-emoji (4.0.4)
|
78
|
+
webmock (3.25.1)
|
79
|
+
addressable (>= 2.8.0)
|
80
|
+
crack (>= 0.3.2)
|
81
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
82
|
+
webrick (1.9.1)
|
83
|
+
websocket-driver (0.7.7)
|
84
|
+
base64
|
66
85
|
websocket-extensions (>= 0.1.0)
|
67
86
|
websocket-extensions (0.1.5)
|
68
87
|
|
@@ -71,12 +90,14 @@ PLATFORMS
|
|
71
90
|
x86_64-linux
|
72
91
|
|
73
92
|
DEPENDENCIES
|
93
|
+
base64 (~> 0.2.0)
|
74
94
|
coelacanth!
|
75
|
-
ferrum (~> 0.
|
95
|
+
ferrum (~> 0.16)
|
76
96
|
oga (~> 3.4)
|
77
97
|
rake (~> 13.2)
|
78
98
|
rspec (~> 3.0)
|
79
99
|
rubocop (~> 1.21)
|
100
|
+
webmock (~> 3.25)
|
80
101
|
|
81
102
|
BUNDLED WITH
|
82
|
-
2.
|
103
|
+
2.6.7
|
data/README.md
CHANGED
data/config/coelacanth.yml
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
development: &development
|
2
|
+
client: "ferrum" # Options: "ferrum", "screenshot_one"
|
2
3
|
remote_client:
|
3
4
|
ws_url: "ws://chrome:3000/chrome"
|
4
5
|
timeout: 10 # seconds
|
5
6
|
headers:
|
6
7
|
Authorization: "Bearer 1234567890"
|
7
8
|
User-Agent: "Coelacanth Chrome Extension"
|
9
|
+
screenshot_one:
|
10
|
+
key: "your_screenshot_one_api_key_here"
|
8
11
|
test:
|
9
12
|
<<: *development
|
10
13
|
production:
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ferrum"
|
4
|
+
|
5
|
+
module Coelacanth::Client
|
6
|
+
# Coelacanth::Client
|
7
|
+
class Base
|
8
|
+
def initialize(url, config = Coelacanth.config)
|
9
|
+
@validator = Coelacanth::Validator.new
|
10
|
+
raise URI::InvalidURIError unless @validator.valid_url?(url)
|
11
|
+
@config = config
|
12
|
+
@url = url
|
13
|
+
end
|
14
|
+
|
15
|
+
def client
|
16
|
+
@config.read("client")
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_response(url = nil)
|
20
|
+
raise "Must be implemented in subclass"
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_screenshot
|
24
|
+
raise "Must be implemented in subclass"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -2,13 +2,11 @@
|
|
2
2
|
|
3
3
|
require "ferrum"
|
4
4
|
|
5
|
-
module Coelacanth
|
5
|
+
module Coelacanth::Client
|
6
6
|
# Coelacanth::Client
|
7
|
-
class Client
|
7
|
+
class Ferrum < Coelacanth::Client::Base
|
8
8
|
def initialize(url)
|
9
|
-
|
10
|
-
raise URI::InvalidURIError unless @validator.valid_url?(url)
|
11
|
-
@config = Coelacanth.config
|
9
|
+
super(url)
|
12
10
|
remote_client.goto(url)
|
13
11
|
end
|
14
12
|
|
@@ -32,7 +30,7 @@ module Coelacanth
|
|
32
30
|
def remote_client
|
33
31
|
if @remote_client.nil?
|
34
32
|
headers = @config.read("remote_client.headers")
|
35
|
-
@remote_client = Ferrum::Browser.new(
|
33
|
+
@remote_client = ::Ferrum::Browser.new(
|
36
34
|
ws_url: @config.read("remote_client.ws_url"),
|
37
35
|
timeout: @config.read("remote_client.timeout")
|
38
36
|
).create_page
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "open-uri"
|
4
|
+
require "ferrum"
|
5
|
+
|
6
|
+
module Coelacanth::Client
|
7
|
+
# Coelacanth::Client
|
8
|
+
class ScreenshotOne < Coelacanth::Client::Base
|
9
|
+
def get_response
|
10
|
+
@origin_response = URI(@url).open
|
11
|
+
@status_code = @origin_response.status[0].to_i
|
12
|
+
body = @origin_response.read
|
13
|
+
body
|
14
|
+
rescue OpenURI::HTTPError => e
|
15
|
+
@status_code = e.io.status[0].to_i
|
16
|
+
raise e
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_screenshot
|
20
|
+
api_key = @config.read("screenshot_one.key")
|
21
|
+
uri = URI("https://api.screenshotone.com/take")
|
22
|
+
params = {
|
23
|
+
access_key: api_key,
|
24
|
+
url: @url,
|
25
|
+
format: "jpg",
|
26
|
+
block_ads: true,
|
27
|
+
block_cookie_banners: true,
|
28
|
+
block_banners_by_heuristics: false,
|
29
|
+
block_trackers: true,
|
30
|
+
delay: 0,
|
31
|
+
timeout: 60,
|
32
|
+
response_type: "by_format",
|
33
|
+
image_quality: 80
|
34
|
+
}
|
35
|
+
uri.query = URI.encode_www_form(params)
|
36
|
+
|
37
|
+
response = Net::HTTP.get_response(uri)
|
38
|
+
raise "Failed to fetch screenshot: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
39
|
+
|
40
|
+
response.body
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/coelacanth/dom.rb
CHANGED
data/lib/coelacanth/version.rb
CHANGED
data/lib/coelacanth.rb
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
require "net/http"
|
4
4
|
require_relative "coelacanth/configure"
|
5
|
-
require_relative "coelacanth/client"
|
5
|
+
require_relative "coelacanth/client/base"
|
6
|
+
require_relative "coelacanth/client/ferrum"
|
7
|
+
require_relative "coelacanth/client/screenshot_one"
|
6
8
|
require_relative "coelacanth/dom"
|
7
9
|
require_relative "coelacanth/redirect"
|
8
10
|
require_relative "coelacanth/validator"
|
@@ -15,7 +17,8 @@ module Coelacanth
|
|
15
17
|
class DeepRedirectError < StandardError; end
|
16
18
|
|
17
19
|
def self.analyze(url)
|
18
|
-
|
20
|
+
client_class = config.read("client") == "screenshot_one" ? Client::ScreenshotOne : Client::Ferrum
|
21
|
+
@client = client_class.new(url)
|
19
22
|
regular_url = Redirect.new.resolve_redirect(url)
|
20
23
|
{
|
21
24
|
dom: Dom.new.oga(regular_url),
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coelacanth
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yusuke
|
8
|
-
autorequire:
|
9
8
|
bindir: exe
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies: []
|
13
12
|
description: |
|
14
13
|
coelacanth is a gem that allows you to easily parse and analyze web pages,
|
@@ -31,7 +30,9 @@ files:
|
|
31
30
|
- compose.yml
|
32
31
|
- config/coelacanth.yml
|
33
32
|
- lib/coelacanth.rb
|
34
|
-
- lib/coelacanth/client.rb
|
33
|
+
- lib/coelacanth/client/base.rb
|
34
|
+
- lib/coelacanth/client/ferrum.rb
|
35
|
+
- lib/coelacanth/client/screenshot_one.rb
|
35
36
|
- lib/coelacanth/configure.rb
|
36
37
|
- lib/coelacanth/dom.rb
|
37
38
|
- lib/coelacanth/redirect.rb
|
@@ -44,7 +45,6 @@ metadata:
|
|
44
45
|
homepage_uri: https://github.com/slidict/coelacanth
|
45
46
|
source_code_uri: https://github.com/slidict/coelacanth
|
46
47
|
changelog_uri: https://github.com/slidict/coelacanth/releases
|
47
|
-
post_install_message:
|
48
48
|
rdoc_options: []
|
49
49
|
require_paths:
|
50
50
|
- lib
|
@@ -59,8 +59,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
59
59
|
- !ruby/object:Gem::Version
|
60
60
|
version: '0'
|
61
61
|
requirements: []
|
62
|
-
rubygems_version: 3.
|
63
|
-
signing_key:
|
62
|
+
rubygems_version: 3.6.7
|
64
63
|
specification_version: 4
|
65
64
|
summary: A gem for analyzing and extracting statistics from web pages.
|
66
65
|
test_files: []
|