spidy 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +10 -1
- data/README.md +15 -0
- data/exe/spidy +4 -4
- data/lib/spidy.rb +6 -12
- data/lib/spidy/command_line.rb +94 -0
- data/lib/spidy/console.rb +4 -5
- data/lib/spidy/definition_file.rb +1 -1
- data/lib/spidy/interface.rb +12 -0
- data/lib/spidy/shell.rb +8 -84
- data/lib/spidy/version.rb +1 -1
- data/spidy.gemspec +1 -0
- metadata +19 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ba417984708d3441c47f69574850c379e4a26dbcf810734545449173e1b886b
|
4
|
+
data.tar.gz: '0881f6202ffbcc8c1a7e153e79b16d7dec68694dbb915430cc3c0a8912e29817'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 18ce16b8d0d5d3b536ccba74c842063c7c86d96fa9455010e58c78de20ed37e92051909a6054b77e7022c2c3072f9402fc246daf530c00287e22adbde2d598cf
|
7
|
+
data.tar.gz: e15ee2bec15d64dce143bad5eb0e59f177e32f4aeafed1626a288eaff690c028de7446e40a88bd16696b381ca08e912c959f5597eea11aeede78677ef3c9ed9e
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.6.5
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
spidy (0.1.
|
4
|
+
spidy (0.1.6)
|
5
5
|
activesupport
|
6
6
|
mechanize
|
7
7
|
pry
|
@@ -41,6 +41,7 @@ GEM
|
|
41
41
|
mime-types-data (3.2019.1009)
|
42
42
|
mini_portile2 (2.4.0)
|
43
43
|
minitest (5.14.0)
|
44
|
+
mixlib-shellout (2.4.4)
|
44
45
|
net-http-digest_auth (1.4.1)
|
45
46
|
net-http-persistent (3.1.0)
|
46
47
|
connection_pool (~> 2.2)
|
@@ -55,11 +56,18 @@ GEM
|
|
55
56
|
rspec-core (~> 3.8.0)
|
56
57
|
rspec-expectations (~> 3.8.0)
|
57
58
|
rspec-mocks (~> 3.8.0)
|
59
|
+
rspec-command (1.0.3)
|
60
|
+
mixlib-shellout (~> 2.0)
|
61
|
+
rspec (~> 3.2)
|
62
|
+
rspec-its (~> 1.2)
|
58
63
|
rspec-core (3.8.2)
|
59
64
|
rspec-support (~> 3.8.0)
|
60
65
|
rspec-expectations (3.8.4)
|
61
66
|
diff-lcs (>= 1.2.0, < 2.0)
|
62
67
|
rspec-support (~> 3.8.0)
|
68
|
+
rspec-its (1.3.0)
|
69
|
+
rspec-core (>= 3.0.0)
|
70
|
+
rspec-expectations (>= 3.0.0)
|
63
71
|
rspec-mocks (3.8.1)
|
64
72
|
diff-lcs (>= 1.2.0, < 2.0)
|
65
73
|
rspec-support (~> 3.8.0)
|
@@ -82,6 +90,7 @@ DEPENDENCIES
|
|
82
90
|
pry
|
83
91
|
rake (~> 10.0)
|
84
92
|
rspec (~> 3.0)
|
93
|
+
rspec-command
|
85
94
|
spidy!
|
86
95
|
|
87
96
|
BUNDLED WITH
|
data/README.md
CHANGED
@@ -44,6 +44,21 @@ cat urls | spidy call website.rb > website.json
|
|
44
44
|
echo 'http://example.com' | spidy each website.rb | spidy call website.rb | jq .
|
45
45
|
```
|
46
46
|
|
47
|
+
### When development console
|
48
|
+
```bash
|
49
|
+
spidy console website.rb
|
50
|
+
```
|
51
|
+
|
52
|
+
### reload source code
|
53
|
+
```
|
54
|
+
pry(#<Spidy::Console>)> reload!
|
55
|
+
```
|
56
|
+
|
57
|
+
```rb
|
58
|
+
each('http://example.com') { |url| break url }
|
59
|
+
call('http://example.com') { |html| break html } # html as nokogiri object ( mechanize )
|
60
|
+
```
|
61
|
+
|
47
62
|
### When used from the ruby code
|
48
63
|
``
|
49
64
|
a = Spidy.define do
|
data/exe/spidy
CHANGED
@@ -7,17 +7,17 @@ require 'pry'
|
|
7
7
|
if ARGV[1].blank?
|
8
8
|
case ARGV[0]
|
9
9
|
when 'version' then STDOUT.puts(Spidy::VERSION)
|
10
|
-
when 'console' then Spidy.
|
10
|
+
when 'console' then Spidy.shell.interactive
|
11
11
|
else
|
12
12
|
STDOUT.puts 'usage: spidy [version console]'
|
13
13
|
end
|
14
14
|
else
|
15
15
|
case ARGV[0]
|
16
|
-
when 'console' then Spidy.
|
17
|
-
when '
|
16
|
+
when 'console' then Spidy.shell(ARGV[1]).interactive
|
17
|
+
when 'function' then Spidy.shell(ARGV[1]).function
|
18
18
|
when 'call' then Spidy.shell(ARGV[1]).call(ARGV[2])
|
19
19
|
when 'each' then Spidy.shell(ARGV[1]).each(ARGV[2])
|
20
20
|
else
|
21
|
-
STDOUT.puts 'usage: spidy [call each
|
21
|
+
STDOUT.puts 'usage: spidy [console function call each] [file]'
|
22
22
|
end
|
23
23
|
end
|
data/lib/spidy.rb
CHANGED
@@ -10,36 +10,30 @@ require 'open-uri'
|
|
10
10
|
#
|
11
11
|
module Spidy
|
12
12
|
extend ActiveSupport::Autoload
|
13
|
+
autoload :Interface
|
13
14
|
autoload :Shell
|
15
|
+
autoload :CommandLine
|
14
16
|
autoload :Console
|
15
17
|
autoload :Definition
|
16
18
|
autoload :DefinitionFile
|
17
19
|
autoload :Binder
|
18
20
|
autoload :Connector
|
19
21
|
|
20
|
-
def self.
|
21
|
-
|
22
|
-
if filepath
|
23
|
-
Pry.start(Spidy::Console.new(Spidy::DefinitionFile.open(filepath)))
|
24
|
-
else
|
25
|
-
Pry.start(Spidy::Console.new)
|
26
|
-
end
|
22
|
+
def self.shell(filepath = nil)
|
23
|
+
Spidy::Shell.new(filepath)
|
27
24
|
end
|
28
25
|
|
29
26
|
def self.open(filepath)
|
30
27
|
Spidy::DefinitionFile.open(filepath).spidy
|
31
28
|
end
|
32
29
|
|
33
|
-
def self.shell(filepath)
|
34
|
-
Spidy::Shell.new(Spidy::DefinitionFile.open(filepath))
|
35
|
-
end
|
36
|
-
|
37
30
|
def self.define(&block)
|
38
|
-
Module.new do
|
31
|
+
spidy = Module.new do
|
39
32
|
class_eval do
|
40
33
|
extend ::Spidy::Definition
|
41
34
|
module_eval(&block)
|
42
35
|
end
|
43
36
|
end
|
37
|
+
Spidy::Interface.new(spidy)
|
44
38
|
end
|
45
39
|
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# spidy shell interface
|
5
|
+
#
|
6
|
+
class Spidy::CommandLine
|
7
|
+
delegate :spidy, to: :@definition_file
|
8
|
+
class_attribute :output, default: (proc { |result| STDOUT.puts(result.to_s) })
|
9
|
+
class_attribute :error_handler, default: (proc { |e, url| STDERR.puts({ url: url, message: e.message, backtrace: e.backtrace }.to_json) })
|
10
|
+
|
11
|
+
def initialize(definition_file)
|
12
|
+
@definition_file = definition_file
|
13
|
+
raise 'unloaded spidy' if definition_file.spidy.nil?
|
14
|
+
end
|
15
|
+
|
16
|
+
def each_stdin_lines(name)
|
17
|
+
STDIN.each_line do |url|
|
18
|
+
begin
|
19
|
+
spidy.each(url.strip, name: name, &output)
|
20
|
+
rescue => e
|
21
|
+
error_handler.call(e, url)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def call_stdin_lines(name)
|
27
|
+
STDIN.each_line do |url|
|
28
|
+
begin
|
29
|
+
spidy.call(url.strip, name: name, &output)
|
30
|
+
rescue => e
|
31
|
+
error_handler.call(e, url)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def call(name)
|
37
|
+
return call_stdin_lines(name) if FileTest.pipe?(STDIN)
|
38
|
+
spidy.call(name: name, &output) unless FileTest.pipe?(STDIN)
|
39
|
+
rescue => e
|
40
|
+
error_handler.call(e, nil)
|
41
|
+
end
|
42
|
+
|
43
|
+
def each(name)
|
44
|
+
return each_stdin_lines(name) if FileTest.pipe?(STDIN)
|
45
|
+
spidy.each(name: name, &output)
|
46
|
+
rescue => e
|
47
|
+
error_handler.call(e, nil)
|
48
|
+
end
|
49
|
+
|
50
|
+
def function
|
51
|
+
print <<~SHELL
|
52
|
+
function spider() {
|
53
|
+
spidy spider #{definition_file.path} $1
|
54
|
+
}
|
55
|
+
function scraper() {
|
56
|
+
spidy call #{definition_file.path} $1
|
57
|
+
}
|
58
|
+
SHELL
|
59
|
+
end
|
60
|
+
|
61
|
+
def build(name)
|
62
|
+
build_shell(name)
|
63
|
+
build_ruby(name)
|
64
|
+
end
|
65
|
+
|
66
|
+
def build_shell(name)
|
67
|
+
File.open("#{name}.sh", 'w') do |f|
|
68
|
+
f.write <<~SHELL
|
69
|
+
#!/bin/bash
|
70
|
+
eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
|
71
|
+
spider example
|
72
|
+
SHELL
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def build_ruby(name)
|
77
|
+
File.open("#{name}.rb", 'w') do |f|
|
78
|
+
f.write <<~RUBY
|
79
|
+
# frozen_string_literal: true
|
80
|
+
|
81
|
+
Spidy.define do
|
82
|
+
spider(:example) do |yielder, connector|
|
83
|
+
# connector.call(url) do |resource|
|
84
|
+
# yielder.call(url or resource)
|
85
|
+
# end
|
86
|
+
end
|
87
|
+
|
88
|
+
define(:example) do
|
89
|
+
end
|
90
|
+
end
|
91
|
+
RUBY
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
data/lib/spidy/console.rb
CHANGED
@@ -4,11 +4,10 @@
|
|
4
4
|
# spidy console
|
5
5
|
#
|
6
6
|
class Spidy::Console
|
7
|
-
|
8
|
-
delegate :
|
9
|
-
delegate :call, :each, to: :spidy
|
7
|
+
delegate :spidy, to: :@definition_file
|
8
|
+
delegate :call, :each, :namespace, allow_nil: true, to: :spidy
|
10
9
|
|
11
|
-
def initialize(definition_file
|
10
|
+
def initialize(definition_file)
|
12
11
|
@definition_file = definition_file
|
13
12
|
end
|
14
13
|
|
@@ -17,6 +16,6 @@ class Spidy::Console
|
|
17
16
|
end
|
18
17
|
|
19
18
|
def reload!
|
20
|
-
@definition_file
|
19
|
+
@definition_file.eval_definition
|
21
20
|
end
|
22
21
|
end
|
data/lib/spidy/shell.rb
CHANGED
@@ -1,96 +1,20 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'pry'
|
4
|
-
|
5
3
|
#
|
6
|
-
# spidy
|
4
|
+
# spidy Shell
|
7
5
|
#
|
8
6
|
class Spidy::Shell
|
9
|
-
|
10
|
-
|
11
|
-
class_attribute :error_handler, default: (proc { |e, url| STDERR.puts({ url: url, message: e.message, backtrace: e.backtrace }.to_json) })
|
12
|
-
delegate :spidy, to: :definition_file
|
13
|
-
|
14
|
-
def initialize(definition_file)
|
15
|
-
@definition_file = definition_file
|
16
|
-
end
|
17
|
-
|
18
|
-
def each_stdin_lines(name)
|
19
|
-
STDIN.each_line do |url|
|
20
|
-
begin
|
21
|
-
spidy.each(url.strip, name: name, &output)
|
22
|
-
rescue => e
|
23
|
-
error_handler.call(e, url)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def call_stdin_lines(name)
|
29
|
-
STDIN.each_line do |url|
|
30
|
-
begin
|
31
|
-
spidy.call(url.strip, name: name, &output)
|
32
|
-
rescue => e
|
33
|
-
error_handler.call(e, url)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def call(name)
|
39
|
-
return call_stdin_lines(name) if FileTest.pipe?(STDIN)
|
40
|
-
spidy.call(name: name, &output) unless FileTest.pipe?(STDIN)
|
41
|
-
rescue => e
|
42
|
-
error_handler.call(e, nil)
|
7
|
+
def initialize(path)
|
8
|
+
@definition_file = Spidy::DefinitionFile.open(path)
|
43
9
|
end
|
44
10
|
|
45
|
-
def
|
46
|
-
|
47
|
-
spidy.each(name: name, &output)
|
48
|
-
rescue => e
|
49
|
-
error_handler.call(e, nil)
|
11
|
+
def interactive
|
12
|
+
Pry.start(Spidy::Console.new(@definition_file))
|
50
13
|
end
|
51
14
|
|
52
|
-
def
|
53
|
-
|
54
|
-
function spider() {
|
55
|
-
spidy spider #{definition_file.path} $1
|
56
|
-
}
|
57
|
-
function scraper() {
|
58
|
-
spidy call #{definition_file.path} $1
|
59
|
-
}
|
60
|
-
SHELL
|
15
|
+
def command_line
|
16
|
+
Spidy::CommandLine.new(@definition_file)
|
61
17
|
end
|
62
18
|
|
63
|
-
|
64
|
-
build_shell(name)
|
65
|
-
build_ruby(name)
|
66
|
-
end
|
67
|
-
|
68
|
-
def build_shell(name)
|
69
|
-
File.open("#{name}.sh", 'w') do |f|
|
70
|
-
f.write <<~SHELL
|
71
|
-
#!/bin/bash
|
72
|
-
eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
|
73
|
-
spider example
|
74
|
-
SHELL
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def build_ruby(name)
|
79
|
-
File.open("#{name}.rb", 'w') do |f|
|
80
|
-
f.write <<~RUBY
|
81
|
-
# frozen_string_literal: true
|
82
|
-
|
83
|
-
Spidy.define do
|
84
|
-
spider(:example) do |yielder, connector|
|
85
|
-
# connector.call(url) do |resource|
|
86
|
-
# yielder.call(url or resource)
|
87
|
-
# end
|
88
|
-
end
|
89
|
-
|
90
|
-
define(:example) do
|
91
|
-
end
|
92
|
-
end
|
93
|
-
RUBY
|
94
|
-
end
|
95
|
-
end
|
19
|
+
delegate :function, :each, :call, to: :command_line
|
96
20
|
end
|
data/lib/spidy/version.rb
CHANGED
data/spidy.gemspec
CHANGED
@@ -29,6 +29,7 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.add_development_dependency 'rake', '~> 10.0'
|
30
30
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
31
31
|
spec.add_development_dependency 'ffaker'
|
32
|
+
spec.add_development_dependency 'rspec-command'
|
32
33
|
|
33
34
|
spec.add_runtime_dependency 'activesupport'
|
34
35
|
spec.add_runtime_dependency 'mechanize'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-02-
|
11
|
+
date: 2020-02-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec-command
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: activesupport
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -151,6 +165,7 @@ files:
|
|
151
165
|
- lib/spidy/binder/html.rb
|
152
166
|
- lib/spidy/binder/json.rb
|
153
167
|
- lib/spidy/binder/xml.rb
|
168
|
+
- lib/spidy/command_line.rb
|
154
169
|
- lib/spidy/connector.rb
|
155
170
|
- lib/spidy/connector/direct.rb
|
156
171
|
- lib/spidy/connector/html.rb
|
@@ -159,6 +174,7 @@ files:
|
|
159
174
|
- lib/spidy/console.rb
|
160
175
|
- lib/spidy/definition.rb
|
161
176
|
- lib/spidy/definition_file.rb
|
177
|
+
- lib/spidy/interface.rb
|
162
178
|
- lib/spidy/shell.rb
|
163
179
|
- lib/spidy/spider.rb
|
164
180
|
- lib/spidy/version.rb
|
@@ -183,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
199
|
- !ruby/object:Gem::Version
|
184
200
|
version: '0'
|
185
201
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
202
|
+
rubygems_version: 3.0.3
|
187
203
|
signing_key:
|
188
204
|
specification_version: 4
|
189
205
|
summary: web spider dsl
|