spidy 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -0
- data/README.md +3 -5
- data/exe/spidy +12 -6
- data/lib/spidy.rb +2 -15
- data/lib/spidy/binder.rb +7 -68
- data/lib/spidy/binder/html.rb +43 -0
- data/lib/spidy/binder/json.rb +41 -0
- data/lib/spidy/connector.rb +7 -1
- data/lib/spidy/connector/html.rb +7 -24
- data/lib/spidy/connector/json.rb +10 -0
- data/lib/spidy/console.rb +9 -1
- data/lib/spidy/definition.rb +13 -85
- data/lib/spidy/definition_file.rb +1 -1
- data/lib/spidy/shell.rb +31 -34
- data/lib/spidy/version.rb +1 -1
- data/spidy.png +0 -0
- metadata +6 -4
- data/lib/spidy/looper.rb +0 -22
- data/lib/spidy/result.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ee6041bae6fd932f5b19a52890766192fd59e7ab2be070390e3a8ac16b511cf
|
4
|
+
data.tar.gz: 49d00e22394f0d83d3b49aa8926710ee23c2bffae1c14ce469f3227b338b9cad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 82dd5bb591c47412c0648afe1a5ac7b971c8f1ccbf01708e3556d6edd2ca2a8d514ee3160cbf5cdc550c77d8e10b470eb6b70ff493f30d0b5458384cb18379a7
|
7
|
+
data.tar.gz: 798f5ef00e24d12d14058bc6ad87a54d3a9d67f9d091ee64f1450ceed1008bee027d4a396d2d8cf449bacb895220384b2ddbf141ccfbe7fb8b9e6a5290183d74
|
data/.rubocop.yml
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# Spidy
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
![logo](https://github.com/aileron-inc/spidy/raw/master/spidy.png)
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -32,7 +30,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
32
30
|
|
33
31
|
## Contributing
|
34
32
|
|
35
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
33
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/aileron-inc/spidy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
36
34
|
|
37
35
|
## License
|
38
36
|
|
@@ -40,4 +38,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
40
38
|
|
41
39
|
## Code of Conduct
|
42
40
|
|
43
|
-
Everyone interacting in the Crawler project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
41
|
+
Everyone interacting in the Crawler project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/aileron-inc/spidy/blob/master/CODE_OF_CONDUCT.md).
|
data/exe/spidy
CHANGED
@@ -3,15 +3,21 @@
|
|
3
3
|
|
4
4
|
require 'spidy'
|
5
5
|
|
6
|
-
|
7
|
-
when :spider then Spidy.open(ARGV[1]).shell.spider(ARGV[2])
|
8
|
-
when :scraper then Spidy.open(ARGV[1]).shell.scraper(ARGV[2])
|
9
|
-
when :shell then Spidy.open(ARGV[1]).shell.function
|
10
|
-
when :new then Spidy.open(ARGV[1]).shell.build
|
11
|
-
when :console
|
6
|
+
if ARGV[0]&.to_sym == :console
|
12
7
|
if ARGV[1].blank?
|
13
8
|
Spidy.console
|
14
9
|
else
|
15
10
|
Spidy.open(ARGV[1]).console
|
16
11
|
end
|
12
|
+
return
|
13
|
+
end
|
14
|
+
|
15
|
+
shell = Spidy.open(ARGV[1]).shell
|
16
|
+
|
17
|
+
case ARGV[0]&.to_sym
|
18
|
+
when :shell then shell.function
|
19
|
+
when :call then shell.call(ARGV[2])
|
20
|
+
when :each then shell.each(ARGV[2])
|
21
|
+
else
|
22
|
+
fail 'usage: spidy [call shell new console] [file]'
|
17
23
|
end
|
data/lib/spidy.rb
CHANGED
@@ -17,12 +17,7 @@ module Spidy
|
|
17
17
|
autoload :Definition
|
18
18
|
autoload :DefinitionFile
|
19
19
|
autoload :Binder
|
20
|
-
autoload :Spider
|
21
|
-
autoload :Looper
|
22
20
|
autoload :Connector
|
23
|
-
autoload :Result
|
24
|
-
|
25
|
-
const_set(:Crawler, Module.new) unless const_defined?(:Crawler)
|
26
21
|
|
27
22
|
def self.console
|
28
23
|
require 'pry'
|
@@ -33,15 +28,7 @@ module Spidy
|
|
33
28
|
::Spidy::DefinitionFile.open(filepath)
|
34
29
|
end
|
35
30
|
|
36
|
-
def self.define(
|
37
|
-
|
38
|
-
crawler_definition.domain = domain
|
39
|
-
|
40
|
-
if name
|
41
|
-
crawler_class_name = name.to_s.camelize
|
42
|
-
Crawler.class_eval { remove_const(crawler_class_name) } if Crawler.const_defined?(crawler_class_name)
|
43
|
-
Crawler.const_set(crawler_class_name, crawler_definition)
|
44
|
-
end
|
45
|
-
crawler_definition
|
31
|
+
def self.define(&block)
|
32
|
+
Class.new(::Spidy::Definition, &block)
|
46
33
|
end
|
47
34
|
end
|
data/lib/spidy/binder.rb
CHANGED
@@ -3,75 +3,14 @@
|
|
3
3
|
#
|
4
4
|
# Bind resource received from the connection to the result object
|
5
5
|
#
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
class Multiple
|
11
|
-
def self.bind(connector:, binder:, query:, block:)
|
12
|
-
multiple_binding_class = self
|
13
|
-
connector.field.call(binder, query) do |elements|
|
14
|
-
multiple_binding_class.new(binder.class).instance_exec(elements, &block)
|
15
|
-
end
|
16
|
-
end
|
6
|
+
module Spidy::Binder
|
7
|
+
extend ActiveSupport::Autoload
|
8
|
+
autoload :Json
|
9
|
+
autoload :Html
|
17
10
|
|
18
|
-
|
19
|
-
|
20
|
-
end
|
11
|
+
def self.get(name)
|
12
|
+
return unless name.is_a?(String) || name.is_a?(Symbol)
|
21
13
|
|
22
|
-
|
23
|
-
@binder.field_names << name
|
24
|
-
@binder.field_names.uniq!
|
25
|
-
@binder.result_class.define(name)
|
26
|
-
result = yield
|
27
|
-
@binder.define_method(name) { result }
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
class_attribute :field_names, default: []
|
32
|
-
attr_reader :resource
|
33
|
-
|
34
|
-
def initialize(resource)
|
35
|
-
@resource = resource
|
36
|
-
self.class.fields_call(self)
|
37
|
-
end
|
38
|
-
|
39
|
-
def result
|
40
|
-
definition = self
|
41
|
-
fetched_at = Time.current
|
42
|
-
result = self.class.result_class.new(fetched_at: fetched_at, fetched_on: fetched_at.beginning_of_day, **attributes)
|
43
|
-
result.define_singleton_method(:resource) { definition.resource }
|
44
|
-
result
|
45
|
-
end
|
46
|
-
|
47
|
-
def attributes_to_array
|
48
|
-
field_names.map { |field_name| send(field_name) }
|
49
|
-
end
|
50
|
-
|
51
|
-
def attributes
|
52
|
-
field_names.map { |field_name| [field_name, send(field_name)] }.to_h
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.query(name, query = nil, &block)
|
56
|
-
define_method(name) do
|
57
|
-
connector.field.call(self, query, &block)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
def self.field(name, query = nil, optional: false, &block)
|
62
|
-
field_names << name
|
63
|
-
field_names.uniq!
|
64
|
-
result_class.define(name, presence: !optional)
|
65
|
-
define_method(name) do
|
66
|
-
connector.field.call(self, query, &block)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.fields(query, &block)
|
71
|
-
@fields = { query: query, block: block }
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.fields_call(binder)
|
75
|
-
Multiple.bind(connector: connector, binder: binder, query: @fields[:query], block: @fields[:block]) if @fields
|
14
|
+
const_get(name.to_s.classify)
|
76
15
|
end
|
77
16
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Bind html and convert to object
|
5
|
+
#
|
6
|
+
module Spidy::Binder::Html
|
7
|
+
#
|
8
|
+
# Describe the definition to get the necessary elements from the resource object
|
9
|
+
#
|
10
|
+
class Resource
|
11
|
+
class_attribute :names, default: []
|
12
|
+
attr_reader :html
|
13
|
+
|
14
|
+
def initialize(html)
|
15
|
+
@html = html
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
to_h.to_json
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_h
|
23
|
+
names.map { |name| [name, send(name)] }.to_h
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.let(name, query = nil, &block)
|
27
|
+
names << name
|
28
|
+
define_method(name) do
|
29
|
+
return html.at(query)&.text if block.nil?
|
30
|
+
return instance_exec(&block) if query.blank?
|
31
|
+
|
32
|
+
instance_exec(html.search(query), &block)
|
33
|
+
rescue NoMethodError => e
|
34
|
+
raise "#{html.uri} ##{name} => #{e.message}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.call(html, define_block)
|
40
|
+
binder = Class.new(Resource) { instance_exec(&define_block) }
|
41
|
+
yield binder.new(html)
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Bind json and convert to object
|
5
|
+
#
|
6
|
+
module Spidy::Binder::Json
|
7
|
+
#
|
8
|
+
# Describe the definition to get the necessary elements from the resource object
|
9
|
+
#
|
10
|
+
class Resource
|
11
|
+
class_attribute :names, default: []
|
12
|
+
attr_reader :resource
|
13
|
+
|
14
|
+
def initialize(resource)
|
15
|
+
@resource = resource
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
to_h.to_json
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_h
|
23
|
+
names.map { |name| [name, send(name)] }.to_h
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.let(name, *query, &block)
|
27
|
+
names << name
|
28
|
+
define_method(name) do
|
29
|
+
result = resource.dig(*query) if query.present?
|
30
|
+
return result if block.nil?
|
31
|
+
|
32
|
+
instance_exec(result, &block)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.call(resource, define_block)
|
38
|
+
binder = Class.new(Resource) { instance_exec(&define_block) }
|
39
|
+
yield binder.new(resource)
|
40
|
+
end
|
41
|
+
end
|
data/lib/spidy/connector.rb
CHANGED
data/lib/spidy/connector/html.rb
CHANGED
@@ -3,15 +3,7 @@
|
|
3
3
|
#
|
4
4
|
# Mechanize wrapper
|
5
5
|
#
|
6
|
-
|
7
|
-
class_attribute :field, default: (lambda { |object, query, &block|
|
8
|
-
node = object.resource.search(query)
|
9
|
-
fail "Could not be located #{query}" if node.nil?
|
10
|
-
return node.first.text if block.nil?
|
11
|
-
|
12
|
-
object.instance_exec(node, &block)
|
13
|
-
})
|
14
|
-
|
6
|
+
module Spidy::Connector::Html
|
15
7
|
USER_AGENT = [
|
16
8
|
'Mozilla/5.0',
|
17
9
|
'(Macintosh; Intel Mac OS X 10_12_6)',
|
@@ -21,22 +13,13 @@ class Spidy::Connector::Html
|
|
21
13
|
'Safari/537.36'
|
22
14
|
].join(' ')
|
23
15
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
def initialize(start_url: nil, encoding: nil)
|
28
|
-
@start_url = start_url
|
29
|
-
@agent = Mechanize.new
|
16
|
+
def self.call(url, encoding: nil, &yielder)
|
17
|
+
agent = Mechanize.new
|
30
18
|
if encoding
|
31
|
-
|
32
|
-
|
19
|
+
agent.default_encoding = encoding
|
20
|
+
agent.force_default_encoding = true
|
33
21
|
end
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
def call(url = @start_url, &block)
|
38
|
-
fail 'URL is undefined' if url.blank?
|
39
|
-
|
40
|
-
agent.get(url, &block)
|
22
|
+
agent.user_agent = USER_AGENT
|
23
|
+
agent.get(url, &yielder)
|
41
24
|
end
|
42
25
|
end
|
data/lib/spidy/console.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
#
|
6
6
|
class Spidy::Console
|
7
7
|
attr_reader :definition_file
|
8
|
-
delegate :
|
8
|
+
delegate :namespace, :spiders, to: :definition_file
|
9
9
|
|
10
10
|
def initialize(definition_file = nil)
|
11
11
|
@definition_file = definition_file
|
@@ -18,4 +18,12 @@ class Spidy::Console
|
|
18
18
|
def reload!
|
19
19
|
@definition_file&.eval_definition
|
20
20
|
end
|
21
|
+
|
22
|
+
def call(name, url = nil, &block)
|
23
|
+
namespace[name].call(url, &block)
|
24
|
+
end
|
25
|
+
|
26
|
+
def each(name, url = nil, &block)
|
27
|
+
spiders[name].call(url, &block)
|
28
|
+
end
|
21
29
|
end
|
data/lib/spidy/definition.rb
CHANGED
@@ -4,100 +4,28 @@
|
|
4
4
|
# Class representing a website defined by DSL
|
5
5
|
#
|
6
6
|
class Spidy::Definition
|
7
|
-
class_attribute :
|
7
|
+
class_attribute :namespace, default: {}
|
8
8
|
class_attribute :spiders, default: {}
|
9
|
-
class_attribute :scrapers, default: {}
|
10
|
-
class_attribute :output, default: ->(result) { STDOUT.puts(result.attributes.to_json) }
|
11
9
|
|
12
|
-
def output(&block)
|
13
|
-
self.output = block
|
14
|
-
end
|
15
|
-
|
16
|
-
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
17
10
|
class << self
|
18
|
-
def
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
connector.call(url) do |resource|
|
26
|
-
spider.call(resource, &spider_block)
|
11
|
+
def define(name, connector: nil, binder: nil, as: nil, &define_block)
|
12
|
+
connector = Spidy::Connector.get(as || connector) || connector
|
13
|
+
binder = Spidy::Binder.get(as || binder) || binder
|
14
|
+
namespace[name] = proc do |url, &yielder|
|
15
|
+
connection_yielder = lambda do |resource|
|
16
|
+
binder.call(resource, define_block) do |object|
|
17
|
+
yielder.call(object)
|
27
18
|
end
|
28
19
|
end
|
29
|
-
|
30
|
-
const_set("#{name}_spider".classify, spider_class)
|
31
|
-
spiders[name] = spider_class
|
32
|
-
end
|
33
|
-
|
34
|
-
def scraper(name, options, &block)
|
35
|
-
if options[:loop]
|
36
|
-
loop_scraper(name, options, &block)
|
37
|
-
else
|
38
|
-
normal_scraper(name, **options, &block)
|
20
|
+
connector.call(url, &connection_yielder)
|
39
21
|
end
|
40
22
|
end
|
41
23
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
result_class = Class.new(Spidy::Result)
|
47
|
-
|
48
|
-
# connector
|
49
|
-
connector_class = Spidy::Connector.const_get(options[:as].to_s.classify)
|
50
|
-
connector = connector_class.new(encoding: options[:encoding])
|
51
|
-
|
52
|
-
namespace = Class.new do
|
53
|
-
binder = Class.new(Spidy::Binder) do
|
54
|
-
define_singleton_method(:connector) { connector }
|
55
|
-
define_singleton_method(:result_class) { result_class }
|
56
|
-
define_method(:connector) { connector }
|
57
|
-
instance_exec(&block)
|
58
|
-
end
|
59
|
-
define_singleton_method(:call) do |url = options[:start_url], &yielder|
|
60
|
-
connector.call(url) do |resource|
|
61
|
-
looper = Spidy::Looper.new(resource, binder, options[:loop])
|
62
|
-
looper.call(&yielder)
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
const_set("#{name}_scraper".classify, namespace)
|
67
|
-
scrapers[name] = namespace
|
68
|
-
end
|
69
|
-
|
70
|
-
def normal_scraper(name, encoding: nil, as: :html, &block)
|
71
|
-
# result
|
72
|
-
result_class = Class.new(Spidy::Result)
|
73
|
-
|
74
|
-
# connector
|
75
|
-
connector_class = Spidy::Connector.const_get(as.to_s.classify)
|
76
|
-
connector = connector_class.new(encoding: encoding)
|
77
|
-
|
78
|
-
# namespace
|
79
|
-
namespace = Class.new do
|
80
|
-
binder = Class.new(Spidy::Binder) do
|
81
|
-
define_singleton_method(:connector) { connector }
|
82
|
-
define_singleton_method(:result_class) { result_class }
|
83
|
-
define_method(:connector) { connector }
|
84
|
-
instance_exec(&block)
|
85
|
-
end
|
86
|
-
define_singleton_method(:bind) do |url|
|
87
|
-
connector.call(url) do |resource|
|
88
|
-
binder.new(resource)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
define_singleton_method(:call) do |url, &output|
|
92
|
-
result = bind(url).result
|
93
|
-
fail "#{url}\n#{result.errors.full_messages}" if result.invalid?
|
94
|
-
|
95
|
-
output.call(result)
|
96
|
-
end
|
24
|
+
def spider(name, connector: nil, as: nil)
|
25
|
+
connector = Spidy::Connector.get(as || connector) || connector
|
26
|
+
spiders[name] = proc do |url, &yielder|
|
27
|
+
yield(yielder, connector, url)
|
97
28
|
end
|
98
|
-
const_set("#{name}_scraper".classify, namespace)
|
99
|
-
scrapers[name] = namespace
|
100
29
|
end
|
101
30
|
end
|
102
|
-
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
103
31
|
end
|
data/lib/spidy/shell.rb
CHANGED
@@ -1,51 +1,25 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'pry'
|
4
|
+
|
3
5
|
#
|
4
6
|
# spidy shell interface
|
5
7
|
#
|
6
8
|
class Spidy::Shell
|
7
9
|
attr_reader :definition_file
|
8
|
-
delegate :
|
10
|
+
delegate :namespace, :spiders, to: :definition_file
|
9
11
|
|
10
12
|
def initialize(definition_file)
|
11
13
|
@definition_file = definition_file
|
12
14
|
end
|
13
15
|
|
14
|
-
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
15
|
-
def scraper(name)
|
16
|
-
command = scrapers[name.to_sym]
|
17
|
-
fail "undefined commmand[#{name}]" if command.nil?
|
18
|
-
|
19
|
-
while line = STDIN.gets
|
20
|
-
url = line.strip
|
21
|
-
begin
|
22
|
-
command.call(url, &definition_file.output)
|
23
|
-
rescue => e
|
24
|
-
STDERR.puts "ERROR #{url}: #{e}\n#{e.backtrace}"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
# rubocop:enable Lint/AssignmentInCondition, Style/RescueStandardError
|
29
|
-
|
30
|
-
def spider(name)
|
31
|
-
command = spiders[name.to_sym]
|
32
|
-
if File.pipe?(STDIN)
|
33
|
-
STDIN.each_line do |line|
|
34
|
-
start_url = line.strip
|
35
|
-
command.call(start_url) { |url| puts url }
|
36
|
-
end
|
37
|
-
else
|
38
|
-
command.call { |url| puts url }
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
16
|
def function
|
43
17
|
print <<~SHELL
|
44
18
|
function spider() {
|
45
19
|
spidy spider #{definition_file.path} $1
|
46
20
|
}
|
47
21
|
function scraper() {
|
48
|
-
spidy
|
22
|
+
spidy call #{definition_file.path} $1
|
49
23
|
}
|
50
24
|
SHELL
|
51
25
|
end
|
@@ -56,12 +30,14 @@ class Spidy::Shell
|
|
56
30
|
f.write <<~RUBY
|
57
31
|
# frozen_string_literal: true
|
58
32
|
|
59
|
-
Spidy.define
|
60
|
-
spider(:example
|
61
|
-
#
|
33
|
+
Spidy.define do
|
34
|
+
spider(:example) do |yielder, connector|
|
35
|
+
# connector.call(url) do |resource|
|
36
|
+
# yielder.call(url or resource)
|
37
|
+
# end
|
62
38
|
end
|
63
39
|
|
64
|
-
|
40
|
+
define(:example) do
|
65
41
|
end
|
66
42
|
end
|
67
43
|
RUBY
|
@@ -76,4 +52,25 @@ class Spidy::Shell
|
|
76
52
|
end
|
77
53
|
end
|
78
54
|
# rubocop:enable Metrics/MethodLength
|
55
|
+
|
56
|
+
def call(name)
|
57
|
+
exec(namespace[name&.to_sym] || namespace.values.first)
|
58
|
+
end
|
59
|
+
|
60
|
+
def each(name)
|
61
|
+
exec(spiders[name&.to_sym] || spiders.values.first)
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def exec(command)
|
67
|
+
fail "undefined commmand[#{name}]" if command.nil?
|
68
|
+
|
69
|
+
yielder = proc { |result| STDOUT.puts(result.to_s) }
|
70
|
+
if FileTest.pipe?(STDIN)
|
71
|
+
STDIN.each { |line| command.call(line.strip, &yielder) }
|
72
|
+
else
|
73
|
+
command.call(&yielder)
|
74
|
+
end
|
75
|
+
end
|
79
76
|
end
|
data/lib/spidy/version.rb
CHANGED
data/spidy.png
ADDED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -148,18 +148,20 @@ files:
|
|
148
148
|
- exe/spidy
|
149
149
|
- lib/spidy.rb
|
150
150
|
- lib/spidy/binder.rb
|
151
|
+
- lib/spidy/binder/html.rb
|
152
|
+
- lib/spidy/binder/json.rb
|
151
153
|
- lib/spidy/connector.rb
|
152
154
|
- lib/spidy/connector/html.rb
|
155
|
+
- lib/spidy/connector/json.rb
|
153
156
|
- lib/spidy/connector/xml.rb
|
154
157
|
- lib/spidy/console.rb
|
155
158
|
- lib/spidy/definition.rb
|
156
159
|
- lib/spidy/definition_file.rb
|
157
|
-
- lib/spidy/looper.rb
|
158
|
-
- lib/spidy/result.rb
|
159
160
|
- lib/spidy/shell.rb
|
160
161
|
- lib/spidy/spider.rb
|
161
162
|
- lib/spidy/version.rb
|
162
163
|
- spidy.gemspec
|
164
|
+
- spidy.png
|
163
165
|
homepage: https://github.com/aileron-inc/spidy
|
164
166
|
licenses:
|
165
167
|
- MIT
|
data/lib/spidy/looper.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
#
|
4
|
-
# looper
|
5
|
-
#
|
6
|
-
class Spidy::Looper
|
7
|
-
def initialize(resource, binder, loop_block)
|
8
|
-
@resource = resource
|
9
|
-
@binder = binder
|
10
|
-
@loop_block = loop_block
|
11
|
-
end
|
12
|
-
|
13
|
-
def call
|
14
|
-
yielder = lambda do |element|
|
15
|
-
result = @binder.new(element).result
|
16
|
-
fail "#{element}\n\n#{result.errors.full_messages}" if result.invalid?
|
17
|
-
|
18
|
-
yield result
|
19
|
-
end
|
20
|
-
@loop_block.call(@resource, yielder)
|
21
|
-
end
|
22
|
-
end
|
data/lib/spidy/result.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
#
|
4
|
-
# Scrape results
|
5
|
-
#
|
6
|
-
class Spidy::Result
|
7
|
-
include ActiveModel::Model
|
8
|
-
include ActiveModel::Attributes
|
9
|
-
|
10
|
-
def self.define(name, presence: true)
|
11
|
-
case name
|
12
|
-
when /.*\?/
|
13
|
-
attribute name, :boolean
|
14
|
-
validates name, inclusion: { in: [true, false] } if presence
|
15
|
-
else
|
16
|
-
attribute name
|
17
|
-
validates name, presence: true, allow_blank: true if presence
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
attribute :fetched_at
|
22
|
-
attribute :fetched_on
|
23
|
-
end
|