spidy 0.1.5 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +23 -14
- data/README.md +16 -1
- data/exe/spidy +4 -4
- data/lib/spidy.rb +9 -12
- data/lib/spidy/binder.rb +33 -2
- data/lib/spidy/binder/html.rb +12 -31
- data/lib/spidy/binder/json.rb +15 -32
- data/lib/spidy/binder/xml.rb +12 -31
- data/lib/spidy/command_line.rb +94 -0
- data/lib/spidy/connector/html.rb +1 -0
- data/lib/spidy/console.rb +4 -5
- data/lib/spidy/definition.rb +4 -5
- data/lib/spidy/definition_file.rb +1 -1
- data/lib/spidy/shell.rb +8 -84
- data/lib/spidy/version.rb +1 -1
- data/spidy.gemspec +1 -0
- metadata +18 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d95a2c20a93c9c60969c4a738200391443f2e797273988d7aaa9a7abc812350
|
4
|
+
data.tar.gz: 789cbc87df6997b5fbdb53db4b3fe7c91b6e899b995d232a227a2395c53f67c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e811d34caebb96b1b1d0247ff1a547ed10e5e8a27d82df105c8a809a4b96011e0660805075e8864100d8df29545537c2dde3d3b742e5c35fae33e3a847ad5e9
|
7
|
+
data.tar.gz: fa2f8c4039b009dfc746384dac40028956b880efa96d479485e85b702aad718f4b90e8b44b04912b8aa2bb94d54f27f50570978e2d07078943120bce206e38be
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.6.6
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
spidy (0.
|
4
|
+
spidy (0.2.3)
|
5
5
|
activesupport
|
6
6
|
mechanize
|
7
7
|
pry
|
@@ -9,22 +9,22 @@ PATH
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activesupport (6.0.2
|
12
|
+
activesupport (6.0.3.2)
|
13
13
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
14
14
|
i18n (>= 0.7, < 2)
|
15
15
|
minitest (~> 5.1)
|
16
16
|
tzinfo (~> 1.1)
|
17
|
-
zeitwerk (~> 2.2)
|
17
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
18
18
|
coderay (1.1.2)
|
19
|
-
concurrent-ruby (1.1.
|
20
|
-
connection_pool (2.2.
|
19
|
+
concurrent-ruby (1.1.6)
|
20
|
+
connection_pool (2.2.3)
|
21
21
|
diff-lcs (1.3)
|
22
22
|
domain_name (0.5.20190701)
|
23
23
|
unf (>= 0.0.5, < 1.0.0)
|
24
24
|
ffaker (2.10.0)
|
25
25
|
http-cookie (1.0.3)
|
26
26
|
domain_name (~> 0.5)
|
27
|
-
i18n (1.8.
|
27
|
+
i18n (1.8.5)
|
28
28
|
concurrent-ruby (~> 1.0)
|
29
29
|
mechanize (2.7.6)
|
30
30
|
domain_name (~> 0.5, >= 0.5.1)
|
@@ -38,13 +38,14 @@ GEM
|
|
38
38
|
method_source (0.9.2)
|
39
39
|
mime-types (3.3.1)
|
40
40
|
mime-types-data (~> 3.2015)
|
41
|
-
mime-types-data (3.
|
41
|
+
mime-types-data (3.2020.0512)
|
42
42
|
mini_portile2 (2.4.0)
|
43
|
-
minitest (5.14.
|
43
|
+
minitest (5.14.1)
|
44
|
+
mixlib-shellout (2.4.4)
|
44
45
|
net-http-digest_auth (1.4.1)
|
45
|
-
net-http-persistent (
|
46
|
+
net-http-persistent (4.0.0)
|
46
47
|
connection_pool (~> 2.2)
|
47
|
-
nokogiri (1.10.
|
48
|
+
nokogiri (1.10.10)
|
48
49
|
mini_portile2 (~> 2.4.0)
|
49
50
|
ntlm-http (0.1.1)
|
50
51
|
pry (0.12.2)
|
@@ -55,23 +56,30 @@ GEM
|
|
55
56
|
rspec-core (~> 3.8.0)
|
56
57
|
rspec-expectations (~> 3.8.0)
|
57
58
|
rspec-mocks (~> 3.8.0)
|
59
|
+
rspec-command (1.0.3)
|
60
|
+
mixlib-shellout (~> 2.0)
|
61
|
+
rspec (~> 3.2)
|
62
|
+
rspec-its (~> 1.2)
|
58
63
|
rspec-core (3.8.2)
|
59
64
|
rspec-support (~> 3.8.0)
|
60
65
|
rspec-expectations (3.8.4)
|
61
66
|
diff-lcs (>= 1.2.0, < 2.0)
|
62
67
|
rspec-support (~> 3.8.0)
|
68
|
+
rspec-its (1.3.0)
|
69
|
+
rspec-core (>= 3.0.0)
|
70
|
+
rspec-expectations (>= 3.0.0)
|
63
71
|
rspec-mocks (3.8.1)
|
64
72
|
diff-lcs (>= 1.2.0, < 2.0)
|
65
73
|
rspec-support (~> 3.8.0)
|
66
74
|
rspec-support (3.8.2)
|
67
75
|
thread_safe (0.3.6)
|
68
|
-
tzinfo (1.2.
|
76
|
+
tzinfo (1.2.7)
|
69
77
|
thread_safe (~> 0.1)
|
70
78
|
unf (0.1.4)
|
71
79
|
unf_ext
|
72
|
-
unf_ext (0.0.7.
|
80
|
+
unf_ext (0.0.7.7)
|
73
81
|
webrobots (0.1.2)
|
74
|
-
zeitwerk (2.
|
82
|
+
zeitwerk (2.4.0)
|
75
83
|
|
76
84
|
PLATFORMS
|
77
85
|
ruby
|
@@ -82,7 +90,8 @@ DEPENDENCIES
|
|
82
90
|
pry
|
83
91
|
rake (~> 10.0)
|
84
92
|
rspec (~> 3.0)
|
93
|
+
rspec-command
|
85
94
|
spidy!
|
86
95
|
|
87
96
|
BUNDLED WITH
|
88
|
-
2.1.
|
97
|
+
2.1.4
|
data/README.md
CHANGED
@@ -44,8 +44,23 @@ cat urls | spidy call website.rb > website.json
|
|
44
44
|
echo 'http://example.com' | spidy each website.rb | spidy call website.rb | jq .
|
45
45
|
```
|
46
46
|
|
47
|
+
### When development console
|
48
|
+
```bash
|
49
|
+
spidy console website.rb
|
50
|
+
```
|
51
|
+
|
52
|
+
### reload source code
|
53
|
+
```
|
54
|
+
pry(#<Spidy::Console>)> reload!
|
55
|
+
```
|
56
|
+
|
57
|
+
```rb
|
58
|
+
each('http://example.com') { |url| break url }
|
59
|
+
call('http://example.com') { |html| break html } # html as nokogiri object ( mechanize )
|
60
|
+
```
|
61
|
+
|
47
62
|
### When used from the ruby code
|
48
|
-
|
63
|
+
```rb
|
49
64
|
a = Spidy.define do
|
50
65
|
# Implementing spiders and scrapers
|
51
66
|
end
|
data/exe/spidy
CHANGED
@@ -7,17 +7,17 @@ require 'pry'
|
|
7
7
|
if ARGV[1].blank?
|
8
8
|
case ARGV[0]
|
9
9
|
when 'version' then STDOUT.puts(Spidy::VERSION)
|
10
|
-
when 'console' then Spidy.
|
10
|
+
when 'console' then Spidy.shell.interactive
|
11
11
|
else
|
12
12
|
STDOUT.puts 'usage: spidy [version console]'
|
13
13
|
end
|
14
14
|
else
|
15
15
|
case ARGV[0]
|
16
|
-
when 'console' then Spidy.
|
17
|
-
when '
|
16
|
+
when 'console' then Spidy.shell(ARGV[1]).interactive
|
17
|
+
when 'function' then Spidy.shell(ARGV[1]).function
|
18
18
|
when 'call' then Spidy.shell(ARGV[1]).call(ARGV[2])
|
19
19
|
when 'each' then Spidy.shell(ARGV[1]).each(ARGV[2])
|
20
20
|
else
|
21
|
-
STDOUT.puts 'usage: spidy [call each
|
21
|
+
STDOUT.puts 'usage: spidy [console function call each] [file]'
|
22
22
|
end
|
23
23
|
end
|
data/lib/spidy.rb
CHANGED
@@ -11,35 +11,32 @@ require 'open-uri'
|
|
11
11
|
module Spidy
|
12
12
|
extend ActiveSupport::Autoload
|
13
13
|
autoload :Shell
|
14
|
+
autoload :CommandLine
|
14
15
|
autoload :Console
|
15
16
|
autoload :Definition
|
16
17
|
autoload :DefinitionFile
|
17
18
|
autoload :Binder
|
18
19
|
autoload :Connector
|
19
20
|
|
20
|
-
def self.
|
21
|
-
|
22
|
-
if filepath
|
23
|
-
Pry.start(Spidy::Console.new(Spidy::DefinitionFile.open(filepath)))
|
24
|
-
else
|
25
|
-
Pry.start(Spidy::Console.new)
|
26
|
-
end
|
21
|
+
def self.shell(filepath = nil)
|
22
|
+
Spidy::Shell.new(filepath)
|
27
23
|
end
|
28
24
|
|
29
25
|
def self.open(filepath)
|
30
26
|
Spidy::DefinitionFile.open(filepath).spidy
|
31
27
|
end
|
32
28
|
|
33
|
-
def self.shell(filepath)
|
34
|
-
Spidy::Shell.new(Spidy::DefinitionFile.open(filepath))
|
35
|
-
end
|
36
|
-
|
37
29
|
def self.define(&block)
|
38
|
-
Module.new do
|
30
|
+
spidy = Module.new do
|
39
31
|
class_eval do
|
40
32
|
extend ::Spidy::Definition
|
41
33
|
module_eval(&block)
|
42
34
|
end
|
43
35
|
end
|
36
|
+
spidy.instance_eval do
|
37
|
+
undef :spider
|
38
|
+
undef :define
|
39
|
+
end
|
40
|
+
spidy
|
44
41
|
end
|
45
42
|
end
|
data/lib/spidy/binder.rb
CHANGED
@@ -9,17 +9,48 @@ module Spidy::Binder
|
|
9
9
|
autoload :Html
|
10
10
|
autoload :Xml
|
11
11
|
|
12
|
+
class Error < StandardError
|
13
|
+
end
|
14
|
+
|
12
15
|
class Caller
|
13
16
|
def initialize(spidy, binder)
|
14
17
|
@spidy = spidy
|
15
18
|
@binder = binder
|
16
19
|
end
|
17
20
|
|
18
|
-
def call(source, url: nil, define: nil)
|
19
|
-
yield Class.new(@binder, &define).new(@spidy, source, url)
|
21
|
+
def call(source, url: nil, define: nil, define_name: nil)
|
22
|
+
yield Class.new(@binder, &define).new(define_name, @spidy, source, url)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Base
|
27
|
+
class << self
|
28
|
+
attr_reader :attribute_names
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_reader :resource, :url
|
32
|
+
|
33
|
+
def initialize(define_name, spidy, resource, url)
|
34
|
+
@define_name = define_name
|
35
|
+
@spidy = spidy
|
36
|
+
@resource = resource
|
37
|
+
@url = url
|
38
|
+
end
|
39
|
+
|
40
|
+
def scraper(name, source)
|
41
|
+
lambda { |&block| @spidy.call(source, name: name, &block) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
to_h.to_json
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_h
|
49
|
+
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
20
50
|
end
|
21
51
|
end
|
22
52
|
|
53
|
+
|
23
54
|
def self.get(spidy, value)
|
24
55
|
return Caller.new(spidy, const_get(value.to_s.classify)) if name.is_a?(String) || name.is_a?(Symbol)
|
25
56
|
|
data/lib/spidy/binder/html.rb
CHANGED
@@ -3,42 +3,23 @@
|
|
3
3
|
#
|
4
4
|
# Bind html and convert to object
|
5
5
|
#
|
6
|
-
class Spidy::Binder::Html
|
7
|
-
|
8
|
-
|
6
|
+
class Spidy::Binder::Html < Spidy::Binder::Base
|
7
|
+
def self.let(name, query = nil, &block)
|
8
|
+
@attribute_names ||= []
|
9
|
+
@attribute_names << name
|
9
10
|
|
10
|
-
|
11
|
-
@attribute_names ||= []
|
12
|
-
@attribute_names << name
|
13
|
-
define_method(name) do
|
14
|
-
return html.at(query)&.text if block.nil?
|
15
|
-
return instance_exec(&block) if query.blank?
|
11
|
+
return define_method(name) { html.at(query)&.text } if block.nil?
|
16
12
|
|
13
|
+
define_method(name) do
|
14
|
+
if query.present?
|
17
15
|
instance_exec(html.at(query), &block)
|
18
|
-
|
19
|
-
|
16
|
+
else
|
17
|
+
instance_exec(&block)
|
20
18
|
end
|
19
|
+
rescue StandardError => e
|
20
|
+
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
alias_method :resource, :html
|
26
|
-
|
27
|
-
def initialize(spidy, html, url)
|
28
|
-
@spidy = spidy
|
29
|
-
@url = url
|
30
|
-
@html = html
|
31
|
-
end
|
32
|
-
|
33
|
-
def scraper(name, source)
|
34
|
-
lambda { |&block| @spidy.call(source, name: name, &block) }
|
35
|
-
end
|
36
|
-
|
37
|
-
def to_s
|
38
|
-
to_h.to_json
|
39
|
-
end
|
40
|
-
|
41
|
-
def to_h
|
42
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
43
|
-
end
|
24
|
+
alias_method :html, :resource
|
44
25
|
end
|
data/lib/spidy/binder/json.rb
CHANGED
@@ -3,40 +3,23 @@
|
|
3
3
|
#
|
4
4
|
# Bind json and convert to object
|
5
5
|
#
|
6
|
-
class Spidy::Binder::Json
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
instance_exec(
|
6
|
+
class Spidy::Binder::Json < Spidy::Binder::Base
|
7
|
+
def self.let(name, *query, &block)
|
8
|
+
@attribute_names ||= []
|
9
|
+
@attribute_names << name
|
10
|
+
|
11
|
+
return define_method(name) { json.dig(*query) } if block.nil?
|
12
|
+
|
13
|
+
define_method(name) do
|
14
|
+
if query.present?
|
15
|
+
instance_exec(json.dig(*query), &block)
|
16
|
+
else
|
17
|
+
instance_exec(&block)
|
18
18
|
end
|
19
|
+
rescue StandardError => e
|
20
|
+
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
22
|
-
|
23
|
-
alias_method :resource, :json
|
24
|
-
|
25
|
-
def initialize(spidy, json, url)
|
26
|
-
@spidy = spidy
|
27
|
-
@json = json
|
28
|
-
@url = url
|
29
|
-
end
|
30
|
-
|
31
|
-
def scraper(name, source)
|
32
|
-
lambda { |&block| @spidy.call(source, name: name, &block) }
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_s
|
36
|
-
to_h.to_json
|
37
|
-
end
|
38
|
-
|
39
|
-
def to_h
|
40
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
41
|
-
end
|
24
|
+
alias_method :json, :resource
|
42
25
|
end
|
data/lib/spidy/binder/xml.rb
CHANGED
@@ -3,42 +3,23 @@
|
|
3
3
|
#
|
4
4
|
# Bind xml and convert to object
|
5
5
|
#
|
6
|
-
class Spidy::Binder::Xml
|
7
|
-
|
8
|
-
|
6
|
+
class Spidy::Binder::Xml < Spidy::Binder::Base
|
7
|
+
def self.let(name, query = nil, &block)
|
8
|
+
@attribute_names ||= []
|
9
|
+
@attribute_names << name
|
9
10
|
|
10
|
-
|
11
|
-
@attribute_names ||= []
|
12
|
-
@attribute_names << name
|
13
|
-
define_method(name) do
|
14
|
-
return xml.at(query)&.text if block.nil?
|
15
|
-
return instance_exec(&block) if query.blank?
|
11
|
+
return define_method(name) { xml.at(query)&.text } if block.nil?
|
16
12
|
|
13
|
+
define_method(name) do
|
14
|
+
if query.present?
|
17
15
|
instance_exec(xml.at(query), &block)
|
18
|
-
|
19
|
-
|
16
|
+
else
|
17
|
+
instance_exec(&block)
|
20
18
|
end
|
19
|
+
rescue StandardError => e
|
20
|
+
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
|
25
|
-
alias_method :resource, :xml
|
26
|
-
|
27
|
-
def initialize(spidy, xml, url)
|
28
|
-
@spidy = spidy
|
29
|
-
@xml = xml
|
30
|
-
@url = url
|
31
|
-
end
|
32
|
-
|
33
|
-
def scraper(name, source)
|
34
|
-
lambda { |&block| @spidy.call(source, name: name, &block) }
|
35
|
-
end
|
36
|
-
|
37
|
-
def to_s
|
38
|
-
to_h.to_json
|
39
|
-
end
|
40
|
-
|
41
|
-
def to_h
|
42
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
43
|
-
end
|
24
|
+
alias_method :xml, :resource
|
44
25
|
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# spidy shell interface
|
5
|
+
#
|
6
|
+
class Spidy::CommandLine
|
7
|
+
delegate :spidy, to: :@definition_file
|
8
|
+
class_attribute :output, default: (proc { |result| STDOUT.puts(result.to_s) })
|
9
|
+
class_attribute :error_handler, default: (proc { |e, url| STDERR.puts({ url: url, message: e.message, backtrace: e.backtrace }.to_json) })
|
10
|
+
|
11
|
+
def initialize(definition_file)
|
12
|
+
@definition_file = definition_file
|
13
|
+
raise 'unloaded spidy' if definition_file.spidy.nil?
|
14
|
+
end
|
15
|
+
|
16
|
+
def each_stdin_lines(name)
|
17
|
+
STDIN.each_line do |url|
|
18
|
+
begin
|
19
|
+
spidy.each(url.strip, name: name, &output)
|
20
|
+
rescue => e
|
21
|
+
error_handler.call(e, url)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def call_stdin_lines(name)
|
27
|
+
STDIN.each_line do |url|
|
28
|
+
begin
|
29
|
+
spidy.call(url.strip, name: name, &output)
|
30
|
+
rescue => e
|
31
|
+
error_handler.call(e, url)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def call(name)
|
37
|
+
return call_stdin_lines(name) if FileTest.pipe?(STDIN)
|
38
|
+
spidy.call(name: name, &output) unless FileTest.pipe?(STDIN)
|
39
|
+
rescue => e
|
40
|
+
error_handler.call(e, nil)
|
41
|
+
end
|
42
|
+
|
43
|
+
def each(name)
|
44
|
+
return each_stdin_lines(name) if FileTest.pipe?(STDIN)
|
45
|
+
spidy.each(name: name, &output)
|
46
|
+
rescue => e
|
47
|
+
error_handler.call(e, nil)
|
48
|
+
end
|
49
|
+
|
50
|
+
def function
|
51
|
+
print <<~SHELL
|
52
|
+
function spider() {
|
53
|
+
spidy spider #{definition_file.path} $1
|
54
|
+
}
|
55
|
+
function scraper() {
|
56
|
+
spidy call #{definition_file.path} $1
|
57
|
+
}
|
58
|
+
SHELL
|
59
|
+
end
|
60
|
+
|
61
|
+
def build(name)
|
62
|
+
build_shell(name)
|
63
|
+
build_ruby(name)
|
64
|
+
end
|
65
|
+
|
66
|
+
def build_shell(name)
|
67
|
+
File.open("#{name}.sh", 'w') do |f|
|
68
|
+
f.write <<~SHELL
|
69
|
+
#!/bin/bash
|
70
|
+
eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
|
71
|
+
spider example
|
72
|
+
SHELL
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def build_ruby(name)
|
77
|
+
File.open("#{name}.rb", 'w') do |f|
|
78
|
+
f.write <<~RUBY
|
79
|
+
# frozen_string_literal: true
|
80
|
+
|
81
|
+
Spidy.define do
|
82
|
+
spider(:example) do |yielder, connector|
|
83
|
+
# connector.call(url) do |resource|
|
84
|
+
# yielder.call(url or resource)
|
85
|
+
# end
|
86
|
+
end
|
87
|
+
|
88
|
+
define(:example) do
|
89
|
+
end
|
90
|
+
end
|
91
|
+
RUBY
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
data/lib/spidy/connector/html.rb
CHANGED
data/lib/spidy/console.rb
CHANGED
@@ -4,11 +4,10 @@
|
|
4
4
|
# spidy console
|
5
5
|
#
|
6
6
|
class Spidy::Console
|
7
|
-
|
8
|
-
delegate :
|
9
|
-
delegate :call, :each, to: :spidy
|
7
|
+
delegate :spidy, to: :@definition_file
|
8
|
+
delegate :call, :each, :namespace, allow_nil: true, to: :spidy
|
10
9
|
|
11
|
-
def initialize(definition_file
|
10
|
+
def initialize(definition_file)
|
12
11
|
@definition_file = definition_file
|
13
12
|
end
|
14
13
|
|
@@ -17,6 +16,6 @@ class Spidy::Console
|
|
17
16
|
end
|
18
17
|
|
19
18
|
def reload!
|
20
|
-
@definition_file
|
19
|
+
@definition_file.eval_definition
|
21
20
|
end
|
22
21
|
end
|
data/lib/spidy/definition.rb
CHANGED
@@ -36,17 +36,16 @@ module Spidy::Definition
|
|
36
36
|
@namespace ||= {}
|
37
37
|
connector = Spidy::Connector.get(connector || as)
|
38
38
|
binder = Spidy::Binder.get(self, binder || as)
|
39
|
-
@namespace[:"#{name}_scraper"] = define_proc(connector, binder, define_block)
|
39
|
+
@namespace[:"#{name}_scraper"] = define_proc(name, connector, binder, define_block)
|
40
40
|
end
|
41
41
|
|
42
42
|
private
|
43
43
|
|
44
|
-
def define_proc(connector, binder, define_block)
|
44
|
+
def define_proc(name, connector, binder, define_block)
|
45
45
|
proc do |source, &yielder|
|
46
|
-
|
47
|
-
|
46
|
+
yielder = lambda { |result| break result } if yielder.nil?
|
48
47
|
connection_yielder = lambda do |page|
|
49
|
-
binder.call(page, url: source, define: define_block) { |object| yielder.call(object) }
|
48
|
+
binder.call(page, url: source, define: define_block, define_name: name) { |object| yielder.call(object) }
|
50
49
|
end
|
51
50
|
connector.call(source, &connection_yielder)
|
52
51
|
end
|
data/lib/spidy/shell.rb
CHANGED
@@ -1,96 +1,20 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'pry'
|
4
|
-
|
5
3
|
#
|
6
|
-
# spidy
|
4
|
+
# spidy Shell
|
7
5
|
#
|
8
6
|
class Spidy::Shell
|
9
|
-
|
10
|
-
|
11
|
-
class_attribute :error_handler, default: (proc { |e, url| STDERR.puts({ url: url, message: e.message, backtrace: e.backtrace }.to_json) })
|
12
|
-
delegate :spidy, to: :definition_file
|
13
|
-
|
14
|
-
def initialize(definition_file)
|
15
|
-
@definition_file = definition_file
|
16
|
-
end
|
17
|
-
|
18
|
-
def each_stdin_lines(name)
|
19
|
-
STDIN.each_line do |url|
|
20
|
-
begin
|
21
|
-
spidy.each(url.strip, name: name, &output)
|
22
|
-
rescue => e
|
23
|
-
error_handler.call(e, url)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def call_stdin_lines(name)
|
29
|
-
STDIN.each_line do |url|
|
30
|
-
begin
|
31
|
-
spidy.call(url.strip, name: name, &output)
|
32
|
-
rescue => e
|
33
|
-
error_handler.call(e, url)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def call(name)
|
39
|
-
return call_stdin_lines(name) if FileTest.pipe?(STDIN)
|
40
|
-
spidy.call(name: name, &output) unless FileTest.pipe?(STDIN)
|
41
|
-
rescue => e
|
42
|
-
error_handler.call(e, nil)
|
7
|
+
def initialize(path)
|
8
|
+
@definition_file = Spidy::DefinitionFile.open(path)
|
43
9
|
end
|
44
10
|
|
45
|
-
def
|
46
|
-
|
47
|
-
spidy.each(name: name, &output)
|
48
|
-
rescue => e
|
49
|
-
error_handler.call(e, nil)
|
11
|
+
def interactive
|
12
|
+
Pry.start(Spidy::Console.new(@definition_file))
|
50
13
|
end
|
51
14
|
|
52
|
-
def
|
53
|
-
|
54
|
-
function spider() {
|
55
|
-
spidy spider #{definition_file.path} $1
|
56
|
-
}
|
57
|
-
function scraper() {
|
58
|
-
spidy call #{definition_file.path} $1
|
59
|
-
}
|
60
|
-
SHELL
|
15
|
+
def command_line
|
16
|
+
Spidy::CommandLine.new(@definition_file)
|
61
17
|
end
|
62
18
|
|
63
|
-
|
64
|
-
build_shell(name)
|
65
|
-
build_ruby(name)
|
66
|
-
end
|
67
|
-
|
68
|
-
def build_shell(name)
|
69
|
-
File.open("#{name}.sh", 'w') do |f|
|
70
|
-
f.write <<~SHELL
|
71
|
-
#!/bin/bash
|
72
|
-
eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
|
73
|
-
spider example
|
74
|
-
SHELL
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def build_ruby(name)
|
79
|
-
File.open("#{name}.rb", 'w') do |f|
|
80
|
-
f.write <<~RUBY
|
81
|
-
# frozen_string_literal: true
|
82
|
-
|
83
|
-
Spidy.define do
|
84
|
-
spider(:example) do |yielder, connector|
|
85
|
-
# connector.call(url) do |resource|
|
86
|
-
# yielder.call(url or resource)
|
87
|
-
# end
|
88
|
-
end
|
89
|
-
|
90
|
-
define(:example) do
|
91
|
-
end
|
92
|
-
end
|
93
|
-
RUBY
|
94
|
-
end
|
95
|
-
end
|
19
|
+
delegate :function, :each, :call, to: :command_line
|
96
20
|
end
|
data/lib/spidy/version.rb
CHANGED
data/spidy.gemspec
CHANGED
@@ -29,6 +29,7 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.add_development_dependency 'rake', '~> 10.0'
|
30
30
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
31
31
|
spec.add_development_dependency 'ffaker'
|
32
|
+
spec.add_development_dependency 'rspec-command'
|
32
33
|
|
33
34
|
spec.add_runtime_dependency 'activesupport'
|
34
35
|
spec.add_runtime_dependency 'mechanize'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec-command
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: activesupport
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -151,6 +165,7 @@ files:
|
|
151
165
|
- lib/spidy/binder/html.rb
|
152
166
|
- lib/spidy/binder/json.rb
|
153
167
|
- lib/spidy/binder/xml.rb
|
168
|
+
- lib/spidy/command_line.rb
|
154
169
|
- lib/spidy/connector.rb
|
155
170
|
- lib/spidy/connector/direct.rb
|
156
171
|
- lib/spidy/connector/html.rb
|
@@ -183,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
198
|
- !ruby/object:Gem::Version
|
184
199
|
version: '0'
|
185
200
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
201
|
+
rubygems_version: 3.0.3
|
187
202
|
signing_key:
|
188
203
|
specification_version: 4
|
189
204
|
summary: web spider dsl
|