arb_spider 0.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -0
- data/lib/arb_spider.rb +11 -2
- data/lib/arb_spider/scale_hash.rb +37 -0
- data/lib/arb_spider/spider.rb +63 -0
- data/lib/arb_spider/version.rb +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5de63f3a7f1ae54a77faa7ac8a19bd9cbfa3bf4e
|
4
|
+
data.tar.gz: 6b8ac5b5b5ceeebeaf0feaf3f824ddb063f40b88
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 11d14959c81b2a0e0c51755fed28f64b3851ee35b71763b8d72e697269bb04fe635ddcfa102bc976416589939fa00ad3ccb4d7b9a66dba0252e5a25b0021d6a1
|
7
|
+
data.tar.gz: 8ab34e4442577b03b16e536bdaac322c0908efd5fd7b67d63bae6bf88a61aa7d0310a5968d399dd4752454f1b7952437845b816401c7c649f988da67e2aab585
|
data/Gemfile
CHANGED
data/lib/arb_spider.rb
CHANGED
@@ -1,5 +1,14 @@
|
|
1
|
-
require
|
1
|
+
require 'arb_spider/version'
|
2
|
+
require 'arb_spider/spider'
|
2
3
|
|
3
4
|
module ArbSpider
|
4
|
-
|
5
|
+
|
6
|
+
class << self
|
7
|
+
attr_accessor :default_spider
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(*args, &block)
|
11
|
+
self.class.default_spider ||= ArbSpider::Spider.new
|
12
|
+
self.class.default_spider.send :parse *args, &block
|
13
|
+
end
|
5
14
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module ArbSpider
|
2
|
+
class ScaleHash < BasicObject
|
3
|
+
|
4
|
+
attr_accessor :help_hash
|
5
|
+
attr_accessor :core_hash
|
6
|
+
attr_accessor :counter
|
7
|
+
attr_accessor :scale
|
8
|
+
|
9
|
+
def initialize(scale=10, *args, &block)
|
10
|
+
self.counter ||= -1
|
11
|
+
self.scale=scale
|
12
|
+
self.help_hash = ::Hash.new
|
13
|
+
self.core_hash = ::Hash.new *args, &block
|
14
|
+
end
|
15
|
+
|
16
|
+
def method_missing(method, *args, &block)
|
17
|
+
begin
|
18
|
+
core_hash.send(method, *args, &block) unless hijack_method(method, *args, &block)
|
19
|
+
rescue ::NoMethodError => e
|
20
|
+
::Kernel.raise ::NoMethodError, "undefined method '#{e.name}' for #{self}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
#return true to hijack the call of method
|
25
|
+
def hijack_method(method, *args, &block)
|
26
|
+
case method
|
27
|
+
when :'[]=' then
|
28
|
+
self.counter += 1
|
29
|
+
index = counter % scale
|
30
|
+
core_hash.delete(help_hash[index]) if help_hash.include?(index)
|
31
|
+
help_hash[index]=args[0] #key
|
32
|
+
core_hash[args[0]] = args[1] #value
|
33
|
+
else
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'httpclient'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'arb_spider/scale_hash'
|
4
|
+
|
5
|
+
module ArbSpider
|
6
|
+
class Spider < BasicObject
|
7
|
+
|
8
|
+
attr_reader :cached_docs
|
9
|
+
attr_reader :current_doc
|
10
|
+
attr_reader :client
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@client= ::HTTPClient.new
|
14
|
+
@cached_docs = ScaleHash.new 20
|
15
|
+
|
16
|
+
#after initializing
|
17
|
+
yield self if ::Kernel.block_given?
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse(url=nil, *args, &block)
|
21
|
+
tmp_doc = url.nil? ? current_doc : (@cached_docs[url] || fetch_doc(url, *args))
|
22
|
+
::Kernel.raise StandardError, "fail to load doc of specific url #{url}" if tmp_doc.nil?
|
23
|
+
end
|
24
|
+
|
25
|
+
def method_missing(method, *args, &block)
|
26
|
+
#used to support user authorization through httpclient method (eg post)
|
27
|
+
tmp = @client.send method, *args, &block
|
28
|
+
@current_doc = tmp if ::HTTP::Message === tmp
|
29
|
+
rescue ::NoMethodError => e
|
30
|
+
@current_doc.send method, *args, &block
|
31
|
+
rescue ::NoMethodError => e
|
32
|
+
::Kernel.raise ::NoMethodError, "undefined method '#{e.name}' for #{self}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def inspect
|
36
|
+
klass = class << self
|
37
|
+
self
|
38
|
+
end
|
39
|
+
'<%s:%d>' % [klass.superclass.name, self.__id__]
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def fetch_doc(url, *args)
|
45
|
+
tmp_args = args.dup
|
46
|
+
tmp_doc = client.send(parse_args(:method) {} || :get, *args) #default to use get method
|
47
|
+
cached_docs[url] = tmp_doc
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_args (key, source=nil, &block)
|
51
|
+
raise ArgumentError, 'please provide either arguments source or a block with which the local variables bind! (eg parse_args{}) ' unless source || block
|
52
|
+
args_arr = source || block.binding.local_variables.map { |i|
|
53
|
+
block.binding.local_variable_get i
|
54
|
+
}
|
55
|
+
args_hash = args_arr && args_arr.find do |i|
|
56
|
+
Hash == i # or i.respond_to? :"[]"
|
57
|
+
end
|
58
|
+
raise ArgumentError, 'no arguments hash found in current hash, please provide the proper source ' if args_arr.nil?
|
59
|
+
args_hash.delete key
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
data/lib/arb_spider/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arb_spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- arybin
|
@@ -55,6 +55,8 @@ files:
|
|
55
55
|
- bin/console
|
56
56
|
- bin/setup
|
57
57
|
- lib/arb_spider.rb
|
58
|
+
- lib/arb_spider/scale_hash.rb
|
59
|
+
- lib/arb_spider/spider.rb
|
58
60
|
- lib/arb_spider/version.rb
|
59
61
|
homepage: https://github.com/arybin-cn/arb_spider
|
60
62
|
licenses: []
|