spidy 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -0
- data/README.md +3 -5
- data/exe/spidy +12 -6
- data/lib/spidy.rb +2 -15
- data/lib/spidy/binder.rb +7 -68
- data/lib/spidy/binder/html.rb +43 -0
- data/lib/spidy/binder/json.rb +41 -0
- data/lib/spidy/connector.rb +7 -1
- data/lib/spidy/connector/html.rb +7 -24
- data/lib/spidy/connector/json.rb +10 -0
- data/lib/spidy/console.rb +9 -1
- data/lib/spidy/definition.rb +13 -85
- data/lib/spidy/definition_file.rb +1 -1
- data/lib/spidy/shell.rb +31 -34
- data/lib/spidy/version.rb +1 -1
- data/spidy.png +0 -0
- metadata +6 -4
- data/lib/spidy/looper.rb +0 -22
- data/lib/spidy/result.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ee6041bae6fd932f5b19a52890766192fd59e7ab2be070390e3a8ac16b511cf
|
4
|
+
data.tar.gz: 49d00e22394f0d83d3b49aa8926710ee23c2bffae1c14ce469f3227b338b9cad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 82dd5bb591c47412c0648afe1a5ac7b971c8f1ccbf01708e3556d6edd2ca2a8d514ee3160cbf5cdc550c77d8e10b470eb6b70ff493f30d0b5458384cb18379a7
|
7
|
+
data.tar.gz: 798f5ef00e24d12d14058bc6ad87a54d3a9d67f9d091ee64f1450ceed1008bee027d4a396d2d8cf449bacb895220384b2ddbf141ccfbe7fb8b9e6a5290183d74
|
data/.rubocop.yml
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# Spidy
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+

|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -32,7 +30,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
32
30
|
|
33
31
|
## Contributing
|
34
32
|
|
35
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
33
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/aileron-inc/spidy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
36
34
|
|
37
35
|
## License
|
38
36
|
|
@@ -40,4 +38,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
40
38
|
|
41
39
|
## Code of Conduct
|
42
40
|
|
43
|
-
Everyone interacting in the Crawler project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
41
|
+
Everyone interacting in the Crawler project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/aileron-inc/spidy/blob/master/CODE_OF_CONDUCT.md).
|
data/exe/spidy
CHANGED
@@ -3,15 +3,21 @@
|
|
3
3
|
|
4
4
|
require 'spidy'
|
5
5
|
|
6
|
-
|
7
|
-
when :spider then Spidy.open(ARGV[1]).shell.spider(ARGV[2])
|
8
|
-
when :scraper then Spidy.open(ARGV[1]).shell.scraper(ARGV[2])
|
9
|
-
when :shell then Spidy.open(ARGV[1]).shell.function
|
10
|
-
when :new then Spidy.open(ARGV[1]).shell.build
|
11
|
-
when :console
|
6
|
+
if ARGV[0]&.to_sym == :console
|
12
7
|
if ARGV[1].blank?
|
13
8
|
Spidy.console
|
14
9
|
else
|
15
10
|
Spidy.open(ARGV[1]).console
|
16
11
|
end
|
12
|
+
return
|
13
|
+
end
|
14
|
+
|
15
|
+
shell = Spidy.open(ARGV[1]).shell
|
16
|
+
|
17
|
+
case ARGV[0]&.to_sym
|
18
|
+
when :shell then shell.function
|
19
|
+
when :call then shell.call(ARGV[2])
|
20
|
+
when :each then shell.each(ARGV[2])
|
21
|
+
else
|
22
|
+
fail 'usage: spidy [call shell new console] [file]'
|
17
23
|
end
|
data/lib/spidy.rb
CHANGED
@@ -17,12 +17,7 @@ module Spidy
|
|
17
17
|
autoload :Definition
|
18
18
|
autoload :DefinitionFile
|
19
19
|
autoload :Binder
|
20
|
-
autoload :Spider
|
21
|
-
autoload :Looper
|
22
20
|
autoload :Connector
|
23
|
-
autoload :Result
|
24
|
-
|
25
|
-
const_set(:Crawler, Module.new) unless const_defined?(:Crawler)
|
26
21
|
|
27
22
|
def self.console
|
28
23
|
require 'pry'
|
@@ -33,15 +28,7 @@ module Spidy
|
|
33
28
|
::Spidy::DefinitionFile.open(filepath)
|
34
29
|
end
|
35
30
|
|
36
|
-
def self.define(
|
37
|
-
|
38
|
-
crawler_definition.domain = domain
|
39
|
-
|
40
|
-
if name
|
41
|
-
crawler_class_name = name.to_s.camelize
|
42
|
-
Crawler.class_eval { remove_const(crawler_class_name) } if Crawler.const_defined?(crawler_class_name)
|
43
|
-
Crawler.const_set(crawler_class_name, crawler_definition)
|
44
|
-
end
|
45
|
-
crawler_definition
|
31
|
+
def self.define(&block)
|
32
|
+
Class.new(::Spidy::Definition, &block)
|
46
33
|
end
|
47
34
|
end
|
data/lib/spidy/binder.rb
CHANGED
@@ -3,75 +3,14 @@
|
|
3
3
|
#
|
4
4
|
# Bind resource received from the connection to the result object
|
5
5
|
#
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
class Multiple
|
11
|
-
def self.bind(connector:, binder:, query:, block:)
|
12
|
-
multiple_binding_class = self
|
13
|
-
connector.field.call(binder, query) do |elements|
|
14
|
-
multiple_binding_class.new(binder.class).instance_exec(elements, &block)
|
15
|
-
end
|
16
|
-
end
|
6
|
+
module Spidy::Binder
|
7
|
+
extend ActiveSupport::Autoload
|
8
|
+
autoload :Json
|
9
|
+
autoload :Html
|
17
10
|
|
18
|
-
|
19
|
-
|
20
|
-
end
|
11
|
+
def self.get(name)
|
12
|
+
return unless name.is_a?(String) || name.is_a?(Symbol)
|
21
13
|
|
22
|
-
|
23
|
-
@binder.field_names << name
|
24
|
-
@binder.field_names.uniq!
|
25
|
-
@binder.result_class.define(name)
|
26
|
-
result = yield
|
27
|
-
@binder.define_method(name) { result }
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
class_attribute :field_names, default: []
|
32
|
-
attr_reader :resource
|
33
|
-
|
34
|
-
def initialize(resource)
|
35
|
-
@resource = resource
|
36
|
-
self.class.fields_call(self)
|
37
|
-
end
|
38
|
-
|
39
|
-
def result
|
40
|
-
definition = self
|
41
|
-
fetched_at = Time.current
|
42
|
-
result = self.class.result_class.new(fetched_at: fetched_at, fetched_on: fetched_at.beginning_of_day, **attributes)
|
43
|
-
result.define_singleton_method(:resource) { definition.resource }
|
44
|
-
result
|
45
|
-
end
|
46
|
-
|
47
|
-
def attributes_to_array
|
48
|
-
field_names.map { |field_name| send(field_name) }
|
49
|
-
end
|
50
|
-
|
51
|
-
def attributes
|
52
|
-
field_names.map { |field_name| [field_name, send(field_name)] }.to_h
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.query(name, query = nil, &block)
|
56
|
-
define_method(name) do
|
57
|
-
connector.field.call(self, query, &block)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
def self.field(name, query = nil, optional: false, &block)
|
62
|
-
field_names << name
|
63
|
-
field_names.uniq!
|
64
|
-
result_class.define(name, presence: !optional)
|
65
|
-
define_method(name) do
|
66
|
-
connector.field.call(self, query, &block)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.fields(query, &block)
|
71
|
-
@fields = { query: query, block: block }
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.fields_call(binder)
|
75
|
-
Multiple.bind(connector: connector, binder: binder, query: @fields[:query], block: @fields[:block]) if @fields
|
14
|
+
const_get(name.to_s.classify)
|
76
15
|
end
|
77
16
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Bind html and convert to object
|
5
|
+
#
|
6
|
+
module Spidy::Binder::Html
|
7
|
+
#
|
8
|
+
# Describe the definition to get the necessary elements from the resource object
|
9
|
+
#
|
10
|
+
class Resource
|
11
|
+
class_attribute :names, default: []
|
12
|
+
attr_reader :html
|
13
|
+
|
14
|
+
def initialize(html)
|
15
|
+
@html = html
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
to_h.to_json
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_h
|
23
|
+
names.map { |name| [name, send(name)] }.to_h
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.let(name, query = nil, &block)
|
27
|
+
names << name
|
28
|
+
define_method(name) do
|
29
|
+
return html.at(query)&.text if block.nil?
|
30
|
+
return instance_exec(&block) if query.blank?
|
31
|
+
|
32
|
+
instance_exec(html.search(query), &block)
|
33
|
+
rescue NoMethodError => e
|
34
|
+
raise "#{html.uri} ##{name} => #{e.message}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.call(html, define_block)
|
40
|
+
binder = Class.new(Resource) { instance_exec(&define_block) }
|
41
|
+
yield binder.new(html)
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Bind json and convert to object
|
5
|
+
#
|
6
|
+
module Spidy::Binder::Json
|
7
|
+
#
|
8
|
+
# Describe the definition to get the necessary elements from the resource object
|
9
|
+
#
|
10
|
+
class Resource
|
11
|
+
class_attribute :names, default: []
|
12
|
+
attr_reader :resource
|
13
|
+
|
14
|
+
def initialize(resource)
|
15
|
+
@resource = resource
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
to_h.to_json
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_h
|
23
|
+
names.map { |name| [name, send(name)] }.to_h
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.let(name, *query, &block)
|
27
|
+
names << name
|
28
|
+
define_method(name) do
|
29
|
+
result = resource.dig(*query) if query.present?
|
30
|
+
return result if block.nil?
|
31
|
+
|
32
|
+
instance_exec(result, &block)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.call(resource, define_block)
|
38
|
+
binder = Class.new(Resource) { instance_exec(&define_block) }
|
39
|
+
yield binder.new(resource)
|
40
|
+
end
|
41
|
+
end
|
data/lib/spidy/connector.rb
CHANGED
data/lib/spidy/connector/html.rb
CHANGED
@@ -3,15 +3,7 @@
|
|
3
3
|
#
|
4
4
|
# Mechanize wrapper
|
5
5
|
#
|
6
|
-
|
7
|
-
class_attribute :field, default: (lambda { |object, query, &block|
|
8
|
-
node = object.resource.search(query)
|
9
|
-
fail "Could not be located #{query}" if node.nil?
|
10
|
-
return node.first.text if block.nil?
|
11
|
-
|
12
|
-
object.instance_exec(node, &block)
|
13
|
-
})
|
14
|
-
|
6
|
+
module Spidy::Connector::Html
|
15
7
|
USER_AGENT = [
|
16
8
|
'Mozilla/5.0',
|
17
9
|
'(Macintosh; Intel Mac OS X 10_12_6)',
|
@@ -21,22 +13,13 @@ class Spidy::Connector::Html
|
|
21
13
|
'Safari/537.36'
|
22
14
|
].join(' ')
|
23
15
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
def initialize(start_url: nil, encoding: nil)
|
28
|
-
@start_url = start_url
|
29
|
-
@agent = Mechanize.new
|
16
|
+
def self.call(url, encoding: nil, &yielder)
|
17
|
+
agent = Mechanize.new
|
30
18
|
if encoding
|
31
|
-
|
32
|
-
|
19
|
+
agent.default_encoding = encoding
|
20
|
+
agent.force_default_encoding = true
|
33
21
|
end
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
def call(url = @start_url, &block)
|
38
|
-
fail 'URL is undefined' if url.blank?
|
39
|
-
|
40
|
-
agent.get(url, &block)
|
22
|
+
agent.user_agent = USER_AGENT
|
23
|
+
agent.get(url, &yielder)
|
41
24
|
end
|
42
25
|
end
|
data/lib/spidy/console.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
#
|
6
6
|
class Spidy::Console
|
7
7
|
attr_reader :definition_file
|
8
|
-
delegate :
|
8
|
+
delegate :namespace, :spiders, to: :definition_file
|
9
9
|
|
10
10
|
def initialize(definition_file = nil)
|
11
11
|
@definition_file = definition_file
|
@@ -18,4 +18,12 @@ class Spidy::Console
|
|
18
18
|
def reload!
|
19
19
|
@definition_file&.eval_definition
|
20
20
|
end
|
21
|
+
|
22
|
+
def call(name, url = nil, &block)
|
23
|
+
namespace[name].call(url, &block)
|
24
|
+
end
|
25
|
+
|
26
|
+
def each(name, url = nil, &block)
|
27
|
+
spiders[name].call(url, &block)
|
28
|
+
end
|
21
29
|
end
|
data/lib/spidy/definition.rb
CHANGED
@@ -4,100 +4,28 @@
|
|
4
4
|
# Class representing a website defined by DSL
|
5
5
|
#
|
6
6
|
class Spidy::Definition
|
7
|
-
class_attribute :
|
7
|
+
class_attribute :namespace, default: {}
|
8
8
|
class_attribute :spiders, default: {}
|
9
|
-
class_attribute :scrapers, default: {}
|
10
|
-
class_attribute :output, default: ->(result) { STDOUT.puts(result.attributes.to_json) }
|
11
9
|
|
12
|
-
def output(&block)
|
13
|
-
self.output = block
|
14
|
-
end
|
15
|
-
|
16
|
-
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
17
10
|
class << self
|
18
|
-
def
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
connector.call(url) do |resource|
|
26
|
-
spider.call(resource, &spider_block)
|
11
|
+
def define(name, connector: nil, binder: nil, as: nil, &define_block)
|
12
|
+
connector = Spidy::Connector.get(as || connector) || connector
|
13
|
+
binder = Spidy::Binder.get(as || binder) || binder
|
14
|
+
namespace[name] = proc do |url, &yielder|
|
15
|
+
connection_yielder = lambda do |resource|
|
16
|
+
binder.call(resource, define_block) do |object|
|
17
|
+
yielder.call(object)
|
27
18
|
end
|
28
19
|
end
|
29
|
-
|
30
|
-
const_set("#{name}_spider".classify, spider_class)
|
31
|
-
spiders[name] = spider_class
|
32
|
-
end
|
33
|
-
|
34
|
-
def scraper(name, options, &block)
|
35
|
-
if options[:loop]
|
36
|
-
loop_scraper(name, options, &block)
|
37
|
-
else
|
38
|
-
normal_scraper(name, **options, &block)
|
20
|
+
connector.call(url, &connection_yielder)
|
39
21
|
end
|
40
22
|
end
|
41
23
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
result_class = Class.new(Spidy::Result)
|
47
|
-
|
48
|
-
# connector
|
49
|
-
connector_class = Spidy::Connector.const_get(options[:as].to_s.classify)
|
50
|
-
connector = connector_class.new(encoding: options[:encoding])
|
51
|
-
|
52
|
-
namespace = Class.new do
|
53
|
-
binder = Class.new(Spidy::Binder) do
|
54
|
-
define_singleton_method(:connector) { connector }
|
55
|
-
define_singleton_method(:result_class) { result_class }
|
56
|
-
define_method(:connector) { connector }
|
57
|
-
instance_exec(&block)
|
58
|
-
end
|
59
|
-
define_singleton_method(:call) do |url = options[:start_url], &yielder|
|
60
|
-
connector.call(url) do |resource|
|
61
|
-
looper = Spidy::Looper.new(resource, binder, options[:loop])
|
62
|
-
looper.call(&yielder)
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
const_set("#{name}_scraper".classify, namespace)
|
67
|
-
scrapers[name] = namespace
|
68
|
-
end
|
69
|
-
|
70
|
-
def normal_scraper(name, encoding: nil, as: :html, &block)
|
71
|
-
# result
|
72
|
-
result_class = Class.new(Spidy::Result)
|
73
|
-
|
74
|
-
# connector
|
75
|
-
connector_class = Spidy::Connector.const_get(as.to_s.classify)
|
76
|
-
connector = connector_class.new(encoding: encoding)
|
77
|
-
|
78
|
-
# namespace
|
79
|
-
namespace = Class.new do
|
80
|
-
binder = Class.new(Spidy::Binder) do
|
81
|
-
define_singleton_method(:connector) { connector }
|
82
|
-
define_singleton_method(:result_class) { result_class }
|
83
|
-
define_method(:connector) { connector }
|
84
|
-
instance_exec(&block)
|
85
|
-
end
|
86
|
-
define_singleton_method(:bind) do |url|
|
87
|
-
connector.call(url) do |resource|
|
88
|
-
binder.new(resource)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
define_singleton_method(:call) do |url, &output|
|
92
|
-
result = bind(url).result
|
93
|
-
fail "#{url}\n#{result.errors.full_messages}" if result.invalid?
|
94
|
-
|
95
|
-
output.call(result)
|
96
|
-
end
|
24
|
+
def spider(name, connector: nil, as: nil)
|
25
|
+
connector = Spidy::Connector.get(as || connector) || connector
|
26
|
+
spiders[name] = proc do |url, &yielder|
|
27
|
+
yield(yielder, connector, url)
|
97
28
|
end
|
98
|
-
const_set("#{name}_scraper".classify, namespace)
|
99
|
-
scrapers[name] = namespace
|
100
29
|
end
|
101
30
|
end
|
102
|
-
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
103
31
|
end
|
data/lib/spidy/shell.rb
CHANGED
@@ -1,51 +1,25 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'pry'
|
4
|
+
|
3
5
|
#
|
4
6
|
# spidy shell interface
|
5
7
|
#
|
6
8
|
class Spidy::Shell
|
7
9
|
attr_reader :definition_file
|
8
|
-
delegate :
|
10
|
+
delegate :namespace, :spiders, to: :definition_file
|
9
11
|
|
10
12
|
def initialize(definition_file)
|
11
13
|
@definition_file = definition_file
|
12
14
|
end
|
13
15
|
|
14
|
-
# rubocop:disable Lint/AssignmentInCondition, Style/RescueStandardError
|
15
|
-
def scraper(name)
|
16
|
-
command = scrapers[name.to_sym]
|
17
|
-
fail "undefined commmand[#{name}]" if command.nil?
|
18
|
-
|
19
|
-
while line = STDIN.gets
|
20
|
-
url = line.strip
|
21
|
-
begin
|
22
|
-
command.call(url, &definition_file.output)
|
23
|
-
rescue => e
|
24
|
-
STDERR.puts "ERROR #{url}: #{e}\n#{e.backtrace}"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
# rubocop:enable Lint/AssignmentInCondition, Style/RescueStandardError
|
29
|
-
|
30
|
-
def spider(name)
|
31
|
-
command = spiders[name.to_sym]
|
32
|
-
if File.pipe?(STDIN)
|
33
|
-
STDIN.each_line do |line|
|
34
|
-
start_url = line.strip
|
35
|
-
command.call(start_url) { |url| puts url }
|
36
|
-
end
|
37
|
-
else
|
38
|
-
command.call { |url| puts url }
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
16
|
def function
|
43
17
|
print <<~SHELL
|
44
18
|
function spider() {
|
45
19
|
spidy spider #{definition_file.path} $1
|
46
20
|
}
|
47
21
|
function scraper() {
|
48
|
-
spidy
|
22
|
+
spidy call #{definition_file.path} $1
|
49
23
|
}
|
50
24
|
SHELL
|
51
25
|
end
|
@@ -56,12 +30,14 @@ class Spidy::Shell
|
|
56
30
|
f.write <<~RUBY
|
57
31
|
# frozen_string_literal: true
|
58
32
|
|
59
|
-
Spidy.define
|
60
|
-
spider(:example
|
61
|
-
#
|
33
|
+
Spidy.define do
|
34
|
+
spider(:example) do |yielder, connector|
|
35
|
+
# connector.call(url) do |resource|
|
36
|
+
# yielder.call(url or resource)
|
37
|
+
# end
|
62
38
|
end
|
63
39
|
|
64
|
-
|
40
|
+
define(:example) do
|
65
41
|
end
|
66
42
|
end
|
67
43
|
RUBY
|
@@ -76,4 +52,25 @@ class Spidy::Shell
|
|
76
52
|
end
|
77
53
|
end
|
78
54
|
# rubocop:enable Metrics/MethodLength
|
55
|
+
|
56
|
+
def call(name)
|
57
|
+
exec(namespace[name&.to_sym] || namespace.values.first)
|
58
|
+
end
|
59
|
+
|
60
|
+
def each(name)
|
61
|
+
exec(spiders[name&.to_sym] || spiders.values.first)
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def exec(command)
|
67
|
+
fail "undefined commmand[#{name}]" if command.nil?
|
68
|
+
|
69
|
+
yielder = proc { |result| STDOUT.puts(result.to_s) }
|
70
|
+
if FileTest.pipe?(STDIN)
|
71
|
+
STDIN.each { |line| command.call(line.strip, &yielder) }
|
72
|
+
else
|
73
|
+
command.call(&yielder)
|
74
|
+
end
|
75
|
+
end
|
79
76
|
end
|
data/lib/spidy/version.rb
CHANGED
data/spidy.png
ADDED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -148,18 +148,20 @@ files:
|
|
148
148
|
- exe/spidy
|
149
149
|
- lib/spidy.rb
|
150
150
|
- lib/spidy/binder.rb
|
151
|
+
- lib/spidy/binder/html.rb
|
152
|
+
- lib/spidy/binder/json.rb
|
151
153
|
- lib/spidy/connector.rb
|
152
154
|
- lib/spidy/connector/html.rb
|
155
|
+
- lib/spidy/connector/json.rb
|
153
156
|
- lib/spidy/connector/xml.rb
|
154
157
|
- lib/spidy/console.rb
|
155
158
|
- lib/spidy/definition.rb
|
156
159
|
- lib/spidy/definition_file.rb
|
157
|
-
- lib/spidy/looper.rb
|
158
|
-
- lib/spidy/result.rb
|
159
160
|
- lib/spidy/shell.rb
|
160
161
|
- lib/spidy/spider.rb
|
161
162
|
- lib/spidy/version.rb
|
162
163
|
- spidy.gemspec
|
164
|
+
- spidy.png
|
163
165
|
homepage: https://github.com/aileron-inc/spidy
|
164
166
|
licenses:
|
165
167
|
- MIT
|
data/lib/spidy/looper.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
#
|
4
|
-
# looper
|
5
|
-
#
|
6
|
-
class Spidy::Looper
|
7
|
-
def initialize(resource, binder, loop_block)
|
8
|
-
@resource = resource
|
9
|
-
@binder = binder
|
10
|
-
@loop_block = loop_block
|
11
|
-
end
|
12
|
-
|
13
|
-
def call
|
14
|
-
yielder = lambda do |element|
|
15
|
-
result = @binder.new(element).result
|
16
|
-
fail "#{element}\n\n#{result.errors.full_messages}" if result.invalid?
|
17
|
-
|
18
|
-
yield result
|
19
|
-
end
|
20
|
-
@loop_block.call(@resource, yielder)
|
21
|
-
end
|
22
|
-
end
|
data/lib/spidy/result.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
#
|
4
|
-
# Scrape results
|
5
|
-
#
|
6
|
-
class Spidy::Result
|
7
|
-
include ActiveModel::Model
|
8
|
-
include ActiveModel::Attributes
|
9
|
-
|
10
|
-
def self.define(name, presence: true)
|
11
|
-
case name
|
12
|
-
when /.*\?/
|
13
|
-
attribute name, :boolean
|
14
|
-
validates name, inclusion: { in: [true, false] } if presence
|
15
|
-
else
|
16
|
-
attribute name
|
17
|
-
validates name, presence: true, allow_blank: true if presence
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
attribute :fetched_at
|
22
|
-
attribute :fetched_on
|
23
|
-
end
|