spidy 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/spidy +15 -14
- data/lib/spidy/connector/html.rb +22 -7
- data/lib/spidy/definition.rb +18 -12
- data/lib/spidy/shell.rb +1 -1
- data/lib/spidy/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 41b1f40d3ae33eeac273d40a57b956be37971bbb940280c1ca5c9faf0371fbeb
|
4
|
+
data.tar.gz: e55d03b8703142255caf2eb74b927f4dcfdc58b02a96bb51fcb2f1fa0e9e4167
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9577e5315da4a33a4becb1a9c3d41788729fa5dcc8d1a8f1fe774a36a9c34503c7d4f35ee9c20efb99148ac8fe59d9ed69c47e8da20d4590a153b743cc1ea7d4
|
7
|
+
data.tar.gz: ff454254636e05f6df17d8b35d591ad32ffb6de66bd654a2ed34006eb73a87fa9e948539a0b954ae8bf82d30860966066f3947bc34559e783f617c90fa187715
|
data/exe/spidy
CHANGED
@@ -2,22 +2,23 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'spidy'
|
5
|
+
require 'pry'
|
5
6
|
|
6
|
-
if ARGV[
|
7
|
-
|
8
|
-
|
7
|
+
if ARGV[1].blank?
|
8
|
+
case ARGV[0]
|
9
|
+
when 'version' then STDOUT.puts(Spidy::VERSION)
|
10
|
+
when 'console' then Spidy.console
|
9
11
|
else
|
10
|
-
|
12
|
+
STDOUT.puts 'usage: spidy [version console]'
|
11
13
|
end
|
12
|
-
return
|
13
|
-
end
|
14
|
-
|
15
|
-
shell = Spidy.open(ARGV[1]).shell
|
16
|
-
|
17
|
-
case ARGV[0]&.to_sym
|
18
|
-
when :shell then shell.function
|
19
|
-
when :call then shell.call(ARGV[2])
|
20
|
-
when :each then shell.each(ARGV[2])
|
21
14
|
else
|
22
|
-
|
15
|
+
spidy = Spidy.open(ARGV[1])
|
16
|
+
case ARGV[0]
|
17
|
+
when 'console' then spidy.console
|
18
|
+
when 'shell' then spidy.shell.function
|
19
|
+
when 'call' then spidy.shell.call(ARGV[2])
|
20
|
+
when 'each' then spidy.shell.each(ARGV[2])
|
21
|
+
else
|
22
|
+
STDOUT.puts 'usage: spidy [call each shell] [file]'
|
23
|
+
end
|
23
24
|
end
|
data/lib/spidy/connector/html.rb
CHANGED
@@ -13,13 +13,28 @@ module Spidy::Connector::Html
|
|
13
13
|
'Safari/537.36'
|
14
14
|
].join(' ')
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
@agent = Mechanize.new
|
17
|
+
@agent.user_agent = USER_AGENT
|
18
|
+
|
19
|
+
class << self
|
20
|
+
def call(url, encoding: nil, &yielder)
|
21
|
+
if encoding
|
22
|
+
@agent.default_encoding = encoding
|
23
|
+
@agent.force_default_encoding = true
|
24
|
+
end
|
25
|
+
get(url, yielder)
|
26
|
+
end
|
27
|
+
|
28
|
+
def get(url, yielder)
|
29
|
+
@agent.get(url, &yielder)
|
30
|
+
rescue Mechanize::ResponseCodeError => e
|
31
|
+
case e.response_code
|
32
|
+
when '429'
|
33
|
+
sleep 2
|
34
|
+
@agent.get(url, &yielder)
|
35
|
+
else
|
36
|
+
raise e
|
37
|
+
end
|
21
38
|
end
|
22
|
-
agent.user_agent = USER_AGENT
|
23
|
-
agent.get(url, &yielder)
|
24
39
|
end
|
25
40
|
end
|
data/lib/spidy/definition.rb
CHANGED
@@ -8,23 +8,29 @@ class Spidy::Definition
|
|
8
8
|
class_attribute :spiders, default: {}
|
9
9
|
|
10
10
|
class << self
|
11
|
-
def
|
11
|
+
def spider(name = :default, connector: nil, as: nil)
|
12
12
|
connector = Spidy::Connector.get(as || connector) || connector
|
13
|
-
|
14
|
-
|
15
|
-
connection_yielder = lambda do |resource|
|
16
|
-
binder.call(resource, define_block) do |object|
|
17
|
-
yielder.call(object)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
connector.call(url, &connection_yielder)
|
13
|
+
spiders[name] = proc do |url, &yielder|
|
14
|
+
yield(yielder, connector, url)
|
21
15
|
end
|
22
16
|
end
|
23
17
|
|
24
|
-
def
|
18
|
+
def define(name = :default, connector: nil, binder: nil, as: nil, &define_block)
|
25
19
|
connector = Spidy::Connector.get(as || connector) || connector
|
26
|
-
|
27
|
-
|
20
|
+
binder = Spidy::Binder.get(as || binder) || binder
|
21
|
+
namespace[name] = define_proc(connector, binder, define_block)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def define_proc(connector, binder, define_block)
|
27
|
+
proc do |url, &yielder|
|
28
|
+
fail 'invalid argument [Required url / block]' if url.blank? || yielder.nil?
|
29
|
+
|
30
|
+
connection_yielder = lambda do |resource|
|
31
|
+
binder.call(resource, define_block) { |object| yielder.call(object) }
|
32
|
+
end
|
33
|
+
connector.call(url, &connection_yielder)
|
28
34
|
end
|
29
35
|
end
|
30
36
|
end
|
data/lib/spidy/shell.rb
CHANGED
data/lib/spidy/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|