spidy 0.3.12 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +53 -8
- data/.ruby-version +1 -1
- data/CLAUDE.md +28 -0
- data/Gemfile +20 -2
- data/Gemfile.lock +178 -69
- data/README.md +96 -17
- data/Rakefile +0 -2
- data/bin/console +2 -3
- data/example/check_ferrum.rb +114 -0
- data/example/check_lightpanda.rb +59 -0
- data/example/connect_test.rb +48 -0
- data/example/lightpanda_links.rb +80 -0
- data/example/master_detail.rb +1 -3
- data/example/proxy.rb +0 -2
- data/example/retry.rb +0 -2
- data/example/run_with_lightpanda.rb +25 -0
- data/example/simple_test.rb +53 -0
- data/example/test_lightpanda.rb +86 -0
- data/example/wikip.rb +2 -4
- data/exe/spidy +0 -3
- data/lib/spidy/binder/error.rb +0 -2
- data/lib/spidy/binder/html.rb +0 -2
- data/lib/spidy/binder/json.rb +0 -2
- data/lib/spidy/binder/xml.rb +0 -2
- data/lib/spidy/binder.rb +0 -2
- data/lib/spidy/command_line.rb +4 -6
- data/lib/spidy/connector/direct.rb +0 -2
- data/lib/spidy/connector/html.rb +0 -2
- data/lib/spidy/connector/json.rb +2 -4
- data/lib/spidy/connector/lightpanda.rb +161 -0
- data/lib/spidy/connector/xml.rb +4 -6
- data/lib/spidy/connector.rb +7 -8
- data/lib/spidy/console.rb +0 -2
- data/lib/spidy/definition.rb +2 -4
- data/lib/spidy/definition_file.rb +0 -2
- data/lib/spidy/definition_object.rb +0 -2
- data/lib/spidy/shell.rb +6 -3
- data/lib/spidy/spider.rb +2 -4
- data/lib/spidy/version.rb +1 -3
- data/lib/spidy.rb +3 -5
- data/spidy.gemspec +4 -17
- metadata +16 -138
- data/.rubocop_todo.yml +0 -13
@@ -0,0 +1,161 @@
|
|
1
|
+
#
|
2
|
+
# Lightpanda connector for JavaScript-rendered pages via CDP
|
3
|
+
# Using Ferrum for direct CDP connection
|
4
|
+
#
|
5
|
+
class Spidy::Connector::Lightpanda
|
6
|
+
include Spidy::Connector::StaticAccessor
|
7
|
+
|
8
|
+
attr_reader :user_agent, :host, :port
|
9
|
+
|
10
|
+
DEFAULT_HOST = '127.0.0.1'.freeze
|
11
|
+
DEFAULT_PORT = 9222
|
12
|
+
|
13
|
+
def initialize(user_agent:, host: nil, port: nil)
|
14
|
+
begin
|
15
|
+
require 'ferrum'
|
16
|
+
rescue LoadError
|
17
|
+
raise 'Ferrum gem is required. Please install with: gem install ferrum'
|
18
|
+
end
|
19
|
+
|
20
|
+
@user_agent = user_agent
|
21
|
+
@host = host || ENV['LIGHTPANDA_HOST'] || DEFAULT_HOST
|
22
|
+
@port = port || ENV['LIGHTPANDA_PORT'] || DEFAULT_PORT
|
23
|
+
end
|
24
|
+
|
25
|
+
def call(url)
|
26
|
+
fail 'url is not specified' if url.blank?
|
27
|
+
|
28
|
+
# Clean the URL by removing any whitespace or newlines
|
29
|
+
clean_url = url.to_s.strip
|
30
|
+
|
31
|
+
puts "Processing URL: #{clean_url}" if ENV['DEBUG']
|
32
|
+
|
33
|
+
# Create a page-like object similar to Mechanize
|
34
|
+
page = fetch_with_ferrum(clean_url)
|
35
|
+
|
36
|
+
# Apply yielder to the page
|
37
|
+
yield(page)
|
38
|
+
end
|
39
|
+
|
40
|
+
def refresh!
|
41
|
+
# No special refresh actions needed for Lightpanda
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
# Try to wait for network to be idle
|
47
|
+
def wait_for_network_idle(browser)
|
48
|
+
puts 'Waiting for network idle...' if ENV['DEBUG']
|
49
|
+
|
50
|
+
begin
|
51
|
+
# Try with timeout parameter
|
52
|
+
browser.network.wait_for_idle(timeout: 15)
|
53
|
+
rescue ArgumentError
|
54
|
+
begin
|
55
|
+
# Try without timeout parameter
|
56
|
+
browser.network.wait_for_idle
|
57
|
+
rescue StandardError => e
|
58
|
+
# If wait_for_idle fails, fall back to a simple sleep
|
59
|
+
puts "Warning: Could not wait for network idle: #{e.message}" if ENV['DEBUG']
|
60
|
+
sleep 5 # Simple fallback
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Navigate to URL and get page content
|
66
|
+
def navigate_and_get_content(browser, url)
|
67
|
+
# Navigate to the URL
|
68
|
+
puts "Navigating to: #{url}" if ENV['DEBUG']
|
69
|
+
browser.goto(url)
|
70
|
+
|
71
|
+
# Wait for network idle
|
72
|
+
wait_for_network_idle(browser)
|
73
|
+
|
74
|
+
# Get the content
|
75
|
+
puts 'Getting page content...' if ENV['DEBUG']
|
76
|
+
browser.body
|
77
|
+
end
|
78
|
+
|
79
|
+
# Create browser options
|
80
|
+
def create_browser_options
|
81
|
+
options = {
|
82
|
+
headless: true, # Run in headless mode
|
83
|
+
timeout: 20, # Increase timeout
|
84
|
+
process_timeout: 20, # Process timeout
|
85
|
+
window_size: [1280, 800] # Set window size
|
86
|
+
}
|
87
|
+
|
88
|
+
# Add Chrome path if available
|
89
|
+
options[:browser_path] = ENV['CHROME_PATH'] if ENV['CHROME_PATH'] && File.exist?(ENV['CHROME_PATH'])
|
90
|
+
|
91
|
+
options
|
92
|
+
end
|
93
|
+
|
94
|
+
# Connect to CDP server with Ferrum and fetch the page
|
95
|
+
def fetch_with_ferrum(url) # rubocop:todo Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
96
|
+
puts 'Using direct Chrome/Chromium instead of Lightpanda' if ENV['DEBUG']
|
97
|
+
browser = nil
|
98
|
+
html_content = nil
|
99
|
+
|
100
|
+
begin
|
101
|
+
# Create Ferrum browser
|
102
|
+
browser = Ferrum::Browser.new(create_browser_options)
|
103
|
+
|
104
|
+
# Skip user agent setting - not supported in this Ferrum version
|
105
|
+
if @user_agent.present? && ENV.fetch('DEBUG', nil)
|
106
|
+
puts 'User agent setting will be skipped - not supported in your Ferrum version'
|
107
|
+
end
|
108
|
+
|
109
|
+
# Navigate and get content
|
110
|
+
html_content = navigate_and_get_content(browser, url)
|
111
|
+
rescue StandardError => e
|
112
|
+
puts "Error during page navigation: #{e.class} - #{e.message}" if ENV['DEBUG']
|
113
|
+
raise e
|
114
|
+
ensure
|
115
|
+
# Clean up - ensure browser is always closed, even if an error occurred
|
116
|
+
browser&.quit if defined?(browser) && browser
|
117
|
+
end
|
118
|
+
|
119
|
+
# Create a Mechanize-like page object
|
120
|
+
LightpandaPage.new(url, html_content)
|
121
|
+
end
|
122
|
+
|
123
|
+
# Page-like object that mimics the Mechanize::Page interface
|
124
|
+
class LightpandaPage
|
125
|
+
attr_reader :uri, :body, :title, :code, :response_code
|
126
|
+
|
127
|
+
def initialize(url, html_content)
|
128
|
+
@uri = url
|
129
|
+
@body = html_content
|
130
|
+
@doc = Nokogiri::HTML(html_content)
|
131
|
+
@title = @doc.title
|
132
|
+
@code = '200'
|
133
|
+
@response_code = '200'
|
134
|
+
end
|
135
|
+
|
136
|
+
# Common methods from Mechanize::Page that might be used in the application
|
137
|
+
def search(*)
|
138
|
+
@doc.search(*)
|
139
|
+
end
|
140
|
+
|
141
|
+
def at(*)
|
142
|
+
@doc.at(*)
|
143
|
+
end
|
144
|
+
|
145
|
+
def css(*)
|
146
|
+
@doc.css(*)
|
147
|
+
end
|
148
|
+
|
149
|
+
def xpath(*)
|
150
|
+
@doc.xpath(*)
|
151
|
+
end
|
152
|
+
|
153
|
+
def encoding
|
154
|
+
@doc.encoding
|
155
|
+
end
|
156
|
+
|
157
|
+
def try(*args)
|
158
|
+
send(*args) if respond_to?(args.first)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
data/lib/spidy/connector/xml.rb
CHANGED
@@ -1,20 +1,18 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
#
|
4
2
|
# xml
|
5
3
|
#
|
6
4
|
class Spidy::Connector::Xml
|
7
5
|
include Spidy::Connector::StaticAccessor
|
8
6
|
|
9
|
-
def call(url, &
|
7
|
+
def call(url, &)
|
10
8
|
fail 'URL is undefined' if url.blank?
|
11
9
|
|
12
|
-
connect(url, &
|
10
|
+
connect(url, &)
|
13
11
|
end
|
14
12
|
|
15
|
-
def connect(url
|
13
|
+
def connect(url)
|
16
14
|
OpenURI.open_uri(url, 'User-Agent' => @user_agent) do |body|
|
17
|
-
|
15
|
+
yield Nokogiri::XML(body.read.gsub(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F]/, ''), url)
|
18
16
|
end
|
19
17
|
rescue OpenURI::HTTPError => e
|
20
18
|
raise Spidy::Connector::Retry.new(error: e, response_code: e.io.status[0])
|
data/lib/spidy/connector.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
#
|
4
2
|
# This class is responsible for actually making a network connection and downloading hypertext
|
5
3
|
#
|
@@ -9,6 +7,7 @@ module Spidy::Connector
|
|
9
7
|
autoload :Html
|
10
8
|
autoload :Json
|
11
9
|
autoload :Xml
|
10
|
+
autoload :Lightpanda
|
12
11
|
|
13
12
|
DEFAULT_WAIT_TIME = 5
|
14
13
|
|
@@ -35,9 +34,9 @@ module Spidy::Connector
|
|
35
34
|
module StaticAccessor
|
36
35
|
extend ActiveSupport::Concern
|
37
36
|
class_methods do
|
38
|
-
def call(url, wait_time: 5, logger: Spidy::Connector::DEFAULT_LOGGER, user_agent: Spidy::Connector::USER_AGENT, &
|
37
|
+
def call(url, wait_time: 5, logger: Spidy::Connector::DEFAULT_LOGGER, user_agent: Spidy::Connector::USER_AGENT, &)
|
39
38
|
::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(
|
40
|
-
url, &
|
39
|
+
url, &
|
41
40
|
)
|
42
41
|
end
|
43
42
|
end
|
@@ -75,9 +74,9 @@ module Spidy::Connector
|
|
75
74
|
connect(url, &block)
|
76
75
|
end
|
77
76
|
|
78
|
-
def connect(url, retry_attempt_count: @retry_attempt_count, &
|
77
|
+
def connect(url, retry_attempt_count: @retry_attempt_count, &)
|
79
78
|
logger.call('connnector.get': url, 'connnector.accessed': Time.current)
|
80
|
-
origin_connector.call(url, &
|
79
|
+
origin_connector.call(url, &)
|
81
80
|
rescue Spidy::Connector::Retry => e
|
82
81
|
logger.call('retry.accessed': Time.current,
|
83
82
|
'retry.uri': url,
|
@@ -105,9 +104,9 @@ module Spidy::Connector
|
|
105
104
|
@socks_proxy = socks_proxy
|
106
105
|
end
|
107
106
|
|
108
|
-
def call(url, &
|
107
|
+
def call(url, &)
|
109
108
|
Socksify.proxy(socks_proxy[:host], socks_proxy[:port]) do
|
110
|
-
connector.call(url, &
|
109
|
+
connector.call(url, &)
|
111
110
|
end
|
112
111
|
end
|
113
112
|
|
data/lib/spidy/console.rb
CHANGED
data/lib/spidy/definition.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
#
|
4
2
|
# Class representing a website defined by DSL
|
5
3
|
#
|
@@ -42,12 +40,12 @@ module Spidy::Definition
|
|
42
40
|
end
|
43
41
|
end
|
44
42
|
|
45
|
-
def spider(name = :default, connector: nil, as: nil
|
43
|
+
def spider(name = :default, connector: nil, as: nil)
|
46
44
|
@namespace ||= {}
|
47
45
|
connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
|
48
46
|
socks_proxy: @socks_proxy)
|
49
47
|
@namespace[:"#{name}_spider"] = proc do |source, &yielder|
|
50
|
-
|
48
|
+
yield(yielder, connector, source)
|
51
49
|
end
|
52
50
|
end
|
53
51
|
|
data/lib/spidy/shell.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
#
|
4
2
|
# spidy Shell
|
5
3
|
#
|
@@ -9,7 +7,12 @@ class Spidy::Shell
|
|
9
7
|
end
|
10
8
|
|
11
9
|
def interactive
|
12
|
-
|
10
|
+
console = Spidy::Console.new(@definition_file)
|
11
|
+
require 'irb'
|
12
|
+
IRB.setup(nil)
|
13
|
+
irb = IRB::Irb.new(IRB::WorkSpace.new(console))
|
14
|
+
IRB.conf[:MAIN_CONTEXT] = irb.context
|
15
|
+
irb.eval_input
|
13
16
|
end
|
14
17
|
|
15
18
|
def command_line
|
data/lib/spidy/spider.rb
CHANGED
data/lib/spidy/version.rb
CHANGED
data/lib/spidy.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'spidy/version'
|
1
|
+
require_relative 'spidy/version'
|
4
2
|
require 'active_support/all'
|
5
3
|
require 'mechanize'
|
6
4
|
require 'open-uri'
|
@@ -29,11 +27,11 @@ module Spidy
|
|
29
27
|
Spidy::DefinitionFile.open(filepath).spidy
|
30
28
|
end
|
31
29
|
|
32
|
-
def self.define(&
|
30
|
+
def self.define(&)
|
33
31
|
spidy = Module.new do
|
34
32
|
class_eval do
|
35
33
|
extend ::Spidy::Definition
|
36
|
-
module_eval(&
|
34
|
+
module_eval(&)
|
37
35
|
end
|
38
36
|
end
|
39
37
|
spidy.instance_eval do
|
data/spidy.gemspec
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
lib = File.expand_path('lib', __dir__)
|
4
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
3
|
require 'spidy/version'
|
@@ -23,21 +21,10 @@ Gem::Specification.new do |spec|
|
|
23
21
|
spec.bindir = 'exe'
|
24
22
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
25
23
|
spec.require_paths = ['lib']
|
26
|
-
|
27
|
-
spec.
|
28
|
-
spec.
|
29
|
-
spec.
|
30
|
-
spec.add_development_dependency 'pry'
|
31
|
-
spec.add_development_dependency 'rake', '~> 13.0'
|
32
|
-
spec.add_development_dependency 'rspec', '~> 3.0'
|
33
|
-
spec.add_development_dependency 'rspec-command'
|
34
|
-
spec.add_development_dependency 'sinatra'
|
35
|
-
|
36
|
-
spec.add_runtime_dependency 'activesupport'
|
37
|
-
spec.add_runtime_dependency 'mechanize'
|
38
|
-
spec.add_runtime_dependency 'pry'
|
39
|
-
spec.add_runtime_dependency 'socksify'
|
40
|
-
spec.add_runtime_dependency 'tor'
|
24
|
+
spec.add_dependency 'activesupport', '~> 7.1'
|
25
|
+
spec.add_dependency 'mechanize'
|
26
|
+
spec.add_dependency 'socksify'
|
27
|
+
spec.add_dependency 'tor'
|
41
28
|
spec.metadata = {
|
42
29
|
'rubygems_mfa_required' => 'true'
|
43
30
|
}
|
metadata
CHANGED
@@ -1,141 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
|
-
autorequire:
|
9
8
|
bindir: exe
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-04-15 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '2.0'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '2.0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: capybara_discoball
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: ffaker
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: pry
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rake
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - "~>"
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '13.0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - "~>"
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '13.0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rspec
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '3.0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - "~>"
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '3.0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: rspec-command
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: sinatra
|
113
|
-
requirement: !ruby/object:Gem::Requirement
|
114
|
-
requirements:
|
115
|
-
- - ">="
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '0'
|
118
|
-
type: :development
|
119
|
-
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - ">="
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: '0'
|
125
12
|
- !ruby/object:Gem::Dependency
|
126
13
|
name: activesupport
|
127
14
|
requirement: !ruby/object:Gem::Requirement
|
128
15
|
requirements:
|
129
|
-
- - "
|
16
|
+
- - "~>"
|
130
17
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
18
|
+
version: '7.1'
|
132
19
|
type: :runtime
|
133
20
|
prerelease: false
|
134
21
|
version_requirements: !ruby/object:Gem::Requirement
|
135
22
|
requirements:
|
136
|
-
- - "
|
23
|
+
- - "~>"
|
137
24
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
25
|
+
version: '7.1'
|
139
26
|
- !ruby/object:Gem::Dependency
|
140
27
|
name: mechanize
|
141
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,20 +37,6 @@ dependencies:
|
|
150
37
|
- - ">="
|
151
38
|
- !ruby/object:Gem::Version
|
152
39
|
version: '0'
|
153
|
-
- !ruby/object:Gem::Dependency
|
154
|
-
name: pry
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - ">="
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '0'
|
160
|
-
type: :runtime
|
161
|
-
prerelease: false
|
162
|
-
version_requirements: !ruby/object:Gem::Requirement
|
163
|
-
requirements:
|
164
|
-
- - ">="
|
165
|
-
- !ruby/object:Gem::Version
|
166
|
-
version: '0'
|
167
40
|
- !ruby/object:Gem::Dependency
|
168
41
|
name: socksify
|
169
42
|
requirement: !ruby/object:Gem::Requirement
|
@@ -192,7 +65,6 @@ dependencies:
|
|
192
65
|
- - ">="
|
193
66
|
- !ruby/object:Gem::Version
|
194
67
|
version: '0'
|
195
|
-
description:
|
196
68
|
email:
|
197
69
|
- aileron.cc@gmail.com
|
198
70
|
executables:
|
@@ -203,10 +75,10 @@ files:
|
|
203
75
|
- ".gitignore"
|
204
76
|
- ".rspec"
|
205
77
|
- ".rubocop.yml"
|
206
|
-
- ".rubocop_todo.yml"
|
207
78
|
- ".ruby-version"
|
208
79
|
- ".travis.yml"
|
209
80
|
- CHANGELOG.md
|
81
|
+
- CLAUDE.md
|
210
82
|
- CODE_OF_CONDUCT.md
|
211
83
|
- Gemfile
|
212
84
|
- Gemfile.lock
|
@@ -215,9 +87,16 @@ files:
|
|
215
87
|
- Rakefile
|
216
88
|
- bin/console
|
217
89
|
- bin/setup
|
90
|
+
- example/check_ferrum.rb
|
91
|
+
- example/check_lightpanda.rb
|
92
|
+
- example/connect_test.rb
|
93
|
+
- example/lightpanda_links.rb
|
218
94
|
- example/master_detail.rb
|
219
95
|
- example/proxy.rb
|
220
96
|
- example/retry.rb
|
97
|
+
- example/run_with_lightpanda.rb
|
98
|
+
- example/simple_test.rb
|
99
|
+
- example/test_lightpanda.rb
|
221
100
|
- example/wikip.rb
|
222
101
|
- exe/spidy
|
223
102
|
- lib/spidy.rb
|
@@ -231,6 +110,7 @@ files:
|
|
231
110
|
- lib/spidy/connector/direct.rb
|
232
111
|
- lib/spidy/connector/html.rb
|
233
112
|
- lib/spidy/connector/json.rb
|
113
|
+
- lib/spidy/connector/lightpanda.rb
|
234
114
|
- lib/spidy/connector/xml.rb
|
235
115
|
- lib/spidy/console.rb
|
236
116
|
- lib/spidy/definition.rb
|
@@ -246,7 +126,6 @@ licenses:
|
|
246
126
|
- MIT
|
247
127
|
metadata:
|
248
128
|
rubygems_mfa_required: 'true'
|
249
|
-
post_install_message:
|
250
129
|
rdoc_options: []
|
251
130
|
require_paths:
|
252
131
|
- lib
|
@@ -261,8 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
261
140
|
- !ruby/object:Gem::Version
|
262
141
|
version: '0'
|
263
142
|
requirements: []
|
264
|
-
rubygems_version: 3.
|
265
|
-
signing_key:
|
143
|
+
rubygems_version: 3.6.6
|
266
144
|
specification_version: 4
|
267
145
|
summary: web spider dsl
|
268
146
|
test_files: []
|
data/.rubocop_todo.yml
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
# This configuration was generated by
|
2
|
-
# `rubocop --auto-gen-config`
|
3
|
-
# on 2019-03-29 18:00:03 +0900 using RuboCop version 0.66.0.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 7
|
10
|
-
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
11
|
-
# URISchemes: http, https
|
12
|
-
Metrics/LineLength:
|
13
|
-
Max: 96
|