arb_spider 0.1.0 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b7b99674de972409ead3555f8eb50c34e060dbbb
4
- data.tar.gz: 9fa3fea8aae4964f9bb0026a0e1ba60f14c38ba4
3
+ metadata.gz: 5de63f3a7f1ae54a77faa7ac8a19bd9cbfa3bf4e
4
+ data.tar.gz: 6b8ac5b5b5ceeebeaf0feaf3f824ddb063f40b88
5
5
  SHA512:
6
- metadata.gz: 82d4182084187698eacd6b2c1ecdcddd899b8e1e1e2df2253b9d47534618b4314b3eb009dcd9ef7c0977f1d4356b83e48c9c806e91145db2443f9c2becb3c561
7
- data.tar.gz: e52fbe5ee98deebd8f2b4b38d457c217243bc51b0bd6c29fc021c43fdb18f7ce9c69cb3ed87ac507f19bc8f6b18989e96b27c1c74cc739f357ba595ef8ab6bfc
6
+ metadata.gz: 11d14959c81b2a0e0c51755fed28f64b3851ee35b71763b8d72e697269bb04fe635ddcfa102bc976416589939fa00ad3ccb4d7b9a66dba0252e5a25b0021d6a1
7
+ data.tar.gz: 8ab34e4442577b03b16e536bdaac322c0908efd5fd7b67d63bae6bf88a61aa7d0310a5968d399dd4752454f1b7952437845b816401c7c649f988da67e2aab585
data/Gemfile CHANGED
@@ -1,4 +1,8 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
+ gem 'httpclient','~> 2.7.1'
4
+ gem 'nokogiri','~> 1.6.7'
5
+
6
+
3
7
  # Specify your gem's dependencies in arb_spider.gemspec
4
8
  gemspec
data/lib/arb_spider.rb CHANGED
@@ -1,5 +1,14 @@
1
- require "arb_spider/version"
1
+ require 'arb_spider/version'
2
+ require 'arb_spider/spider'
2
3
 
3
4
  module ArbSpider
4
- # Your code goes here...
5
+
6
+ class << self
7
+ attr_accessor :default_spider
8
+ end
9
+
10
+ def parse(*args, &block)
11
+ self.class.default_spider ||= ArbSpider::Spider.new
12
+ self.class.default_spider.send :parse *args, &block
13
+ end
5
14
  end
@@ -0,0 +1,37 @@
1
+ module ArbSpider
2
+ class ScaleHash < BasicObject
3
+
4
+ attr_accessor :help_hash
5
+ attr_accessor :core_hash
6
+ attr_accessor :counter
7
+ attr_accessor :scale
8
+
9
+ def initialize(scale=10, *args, &block)
10
+ self.counter ||= -1
11
+ self.scale=scale
12
+ self.help_hash = ::Hash.new
13
+ self.core_hash = ::Hash.new *args, &block
14
+ end
15
+
16
+ def method_missing(method, *args, &block)
17
+ begin
18
+ core_hash.send(method, *args, &block) unless hijack_method(method, *args, &block)
19
+ rescue ::NoMethodError => e
20
+ ::Kernel.raise ::NoMethodError, "undefined method '#{e.name}' for #{self}"
21
+ end
22
+ end
23
+
24
+ #return true to hijack the call of method
25
+ def hijack_method(method, *args, &block)
26
+ case method
27
+ when :'[]=' then
28
+ self.counter += 1
29
+ index = counter % scale
30
+ core_hash.delete(help_hash[index]) if help_hash.include?(index)
31
+ help_hash[index]=args[0] #key
32
+ core_hash[args[0]] = args[1] #value
33
+ else
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,63 @@
1
+ require 'httpclient'
2
+ require 'nokogiri'
3
+ require 'arb_spider/scale_hash'
4
+
5
+ module ArbSpider
6
+ class Spider < BasicObject
7
+
8
+ attr_reader :cached_docs
9
+ attr_reader :current_doc
10
+ attr_reader :client
11
+
12
+ def initialize
13
+ @client= ::HTTPClient.new
14
+ @cached_docs = ScaleHash.new 20
15
+
16
+ #after initializing
17
+ yield self if ::Kernel.block_given?
18
+ end
19
+
20
+ def parse(url=nil, *args, &block)
21
+ tmp_doc = url.nil? ? current_doc : (@cached_docs[url] || fetch_doc(url, *args))
22
+ ::Kernel.raise StandardError, "fail to load doc of specific url #{url}" if tmp_doc.nil?
23
+ end
24
+
25
+ def method_missing(method, *args, &block)
26
+ #used to support user authorization through httpclient method (eg post)
27
+ tmp = @client.send method, *args, &block
28
+ @current_doc = tmp if ::HTTP::Message === tmp
29
+ rescue ::NoMethodError => e
30
+ @current_doc.send method, *args, &block
31
+ rescue ::NoMethodError => e
32
+ ::Kernel.raise ::NoMethodError, "undefined method '#{e.name}' for #{self}"
33
+ end
34
+
35
+ def inspect
36
+ klass = class << self
37
+ self
38
+ end
39
+ '<%s:%d>' % [klass.superclass.name, self.__id__]
40
+ end
41
+
42
+ private
43
+
44
+ def fetch_doc(url, *args)
45
+ tmp_args = args.dup
46
+ tmp_doc = client.send(parse_args(:method) {} || :get, *args) #default to use get method
47
+ cached_docs[url] = tmp_doc
48
+ end
49
+
50
+ def parse_args (key, source=nil, &block)
51
+ raise ArgumentError, 'please provide either arguments source or a block with which the local variables bind! (eg parse_args{}) ' unless source || block
52
+ args_arr = source || block.binding.local_variables.map { |i|
53
+ block.binding.local_variable_get i
54
+ }
55
+ args_hash = args_arr && args_arr.find do |i|
56
+ Hash == i # or i.respond_to? :"[]"
57
+ end
58
+ raise ArgumentError, 'no arguments hash found in current hash, please provide the proper source ' if args_arr.nil?
59
+ args_hash.delete key
60
+ end
61
+
62
+ end
63
+ end
@@ -1,3 +1,3 @@
1
1
  module ArbSpider
2
- VERSION = "0.1.0"
2
+ VERSION = "1.1.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arb_spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - arybin
@@ -55,6 +55,8 @@ files:
55
55
  - bin/console
56
56
  - bin/setup
57
57
  - lib/arb_spider.rb
58
+ - lib/arb_spider/scale_hash.rb
59
+ - lib/arb_spider/spider.rb
58
60
  - lib/arb_spider/version.rb
59
61
  homepage: https://github.com/arybin-cn/arb_spider
60
62
  licenses: []