spidy 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8dd72483f6ea6a5d0d0711803df81ed67d43edc05297db6815d40f81848bbfa6
4
- data.tar.gz: 8f4576bcb16dd473802eade5a5d7732745b77810dad387de359106e5f30b2da4
3
+ metadata.gz: 41b1f40d3ae33eeac273d40a57b956be37971bbb940280c1ca5c9faf0371fbeb
4
+ data.tar.gz: e55d03b8703142255caf2eb74b927f4dcfdc58b02a96bb51fcb2f1fa0e9e4167
5
5
  SHA512:
6
- metadata.gz: e99a33f48a8e5ba51e1a6956cfd2bd5061b21ac6e46b79343049cf5c109fca30fa60252f5d9ca835f28bec58d3f5a1c361aaa0905cc5ded68c335602ca6c16d9
7
- data.tar.gz: bfc083af4cbbef9c44546ebb8810169c776308978f52468443a7c5cd159aca2975d498f94b5da8496573fb9e437b4587acdd02d293a277e4e7b7e03588563f06
6
+ metadata.gz: 9577e5315da4a33a4becb1a9c3d41788729fa5dcc8d1a8f1fe774a36a9c34503c7d4f35ee9c20efb99148ac8fe59d9ed69c47e8da20d4590a153b743cc1ea7d4
7
+ data.tar.gz: ff454254636e05f6df17d8b35d591ad32ffb6de66bd654a2ed34006eb73a87fa9e948539a0b954ae8bf82d30860966066f3947bc34559e783f617c90fa187715
data/exe/spidy CHANGED
@@ -2,22 +2,23 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  require 'spidy'
5
+ require 'pry'
5
6
 
6
- if ARGV[0]&.to_sym == :console
7
- if ARGV[1].blank?
8
- Spidy.console
7
+ if ARGV[1].blank?
8
+ case ARGV[0]
9
+ when 'version' then STDOUT.puts(Spidy::VERSION)
10
+ when 'console' then Spidy.console
9
11
  else
10
- Spidy.open(ARGV[1]).console
12
+ STDOUT.puts 'usage: spidy [version console]'
11
13
  end
12
- return
13
- end
14
-
15
- shell = Spidy.open(ARGV[1]).shell
16
-
17
- case ARGV[0]&.to_sym
18
- when :shell then shell.function
19
- when :call then shell.call(ARGV[2])
20
- when :each then shell.each(ARGV[2])
21
14
  else
22
- fail 'usage: spidy [call shell new console] [file]'
15
+ spidy = Spidy.open(ARGV[1])
16
+ case ARGV[0]
17
+ when 'console' then spidy.console
18
+ when 'shell' then spidy.shell.function
19
+ when 'call' then spidy.shell.call(ARGV[2])
20
+ when 'each' then spidy.shell.each(ARGV[2])
21
+ else
22
+ STDOUT.puts 'usage: spidy [call each shell] [file]'
23
+ end
23
24
  end
@@ -13,13 +13,28 @@ module Spidy::Connector::Html
13
13
  'Safari/537.36'
14
14
  ].join(' ')
15
15
 
16
- def self.call(url, encoding: nil, &yielder)
17
- agent = Mechanize.new
18
- if encoding
19
- agent.default_encoding = encoding
20
- agent.force_default_encoding = true
16
+ @agent = Mechanize.new
17
+ @agent.user_agent = USER_AGENT
18
+
19
+ class << self
20
+ def call(url, encoding: nil, &yielder)
21
+ if encoding
22
+ @agent.default_encoding = encoding
23
+ @agent.force_default_encoding = true
24
+ end
25
+ get(url, yielder)
26
+ end
27
+
28
+ def get(url, yielder)
29
+ @agent.get(url, &yielder)
30
+ rescue Mechanize::ResponseCodeError => e
31
+ case e.response_code
32
+ when '429'
33
+ sleep 2
34
+ @agent.get(url, &yielder)
35
+ else
36
+ raise e
37
+ end
21
38
  end
22
- agent.user_agent = USER_AGENT
23
- agent.get(url, &yielder)
24
39
  end
25
40
  end
@@ -8,23 +8,29 @@ class Spidy::Definition
8
8
  class_attribute :spiders, default: {}
9
9
 
10
10
  class << self
11
- def define(name, connector: nil, binder: nil, as: nil, &define_block)
11
+ def spider(name = :default, connector: nil, as: nil)
12
12
  connector = Spidy::Connector.get(as || connector) || connector
13
- binder = Spidy::Binder.get(as || binder) || binder
14
- namespace[name] = proc do |url, &yielder|
15
- connection_yielder = lambda do |resource|
16
- binder.call(resource, define_block) do |object|
17
- yielder.call(object)
18
- end
19
- end
20
- connector.call(url, &connection_yielder)
13
+ spiders[name] = proc do |url, &yielder|
14
+ yield(yielder, connector, url)
21
15
  end
22
16
  end
23
17
 
24
- def spider(name, connector: nil, as: nil)
18
+ def define(name = :default, connector: nil, binder: nil, as: nil, &define_block)
25
19
  connector = Spidy::Connector.get(as || connector) || connector
26
- spiders[name] = proc do |url, &yielder|
27
- yield(yielder, connector, url)
20
+ binder = Spidy::Binder.get(as || binder) || binder
21
+ namespace[name] = define_proc(connector, binder, define_block)
22
+ end
23
+
24
+ private
25
+
26
+ def define_proc(connector, binder, define_block)
27
+ proc do |url, &yielder|
28
+ fail 'invalid argument [Required url / block]' if url.blank? || yielder.nil?
29
+
30
+ connection_yielder = lambda do |resource|
31
+ binder.call(resource, define_block) { |object| yielder.call(object) }
32
+ end
33
+ connector.call(url, &connection_yielder)
28
34
  end
29
35
  end
30
36
  end
data/lib/spidy/shell.rb CHANGED
@@ -76,7 +76,7 @@ class Spidy::Shell
76
76
  STDIN.each do |line|
77
77
  command.call(line.strip, &output_yielder)
78
78
  rescue StandardError => e
79
- STDERR.puts("#{line.strip} => \n #{e.message}")
79
+ STDERR.puts("#{line.strip}\n #{e.message}")
80
80
  end
81
81
  end
82
82
  end
data/lib/spidy/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spidy
4
- VERSION = '0.0.4'
4
+ VERSION = '0.0.5'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - aileron
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-03 00:00:00.000000000 Z
11
+ date: 2019-09-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler