rhack 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +0,0 @@
1
- === Version 1.0.0
2
-
3
- * Divided by classes and packed as gem
4
-
@@ -55,7 +55,6 @@ test/test_frame.rb
55
55
  ./Rakefile
56
56
  ./Manifest.txt
57
57
  ./CURB-LICENSE
58
- ./License.txt
59
58
  ./README.txt
60
59
  ./Gemfile
61
60
  ./History.txt
data/README.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  ## RHACK
2
2
 
3
3
  Rhack is Ruby Http ACcess Kit -- curl-based web-client for developing web-clients
4
- applications such as webscrappers.
4
+ applications.
5
5
 
6
6
  ## License
7
7
 
data/Rakefile CHANGED
@@ -8,13 +8,13 @@ rescue LoadError
8
8
  end
9
9
 
10
10
  compile_manifest
11
- RHACK_VERSION = '0.2.2'
11
+ RHACK_VERSION = '0.3.0'
12
12
  begin
13
13
  require 'hoe'
14
14
  config = Hoe.spec "rhack" do |h|
15
15
  h.developer("Sergey Baev", "tinbka@gmail.com")
16
16
 
17
- h.description = 'Webscrapping library based on curb gem extension and libxml-ruby (and optionally Johnson and ActiveRecord)'
17
+ h.description = 'Webscraping library based on curb gem extension and libxml-ruby (and optionally Johnson and ActiveRecord)'
18
18
  # h.summary = h.description
19
19
  h.url = 'http://github.com/tinbka'
20
20
 
@@ -212,6 +212,12 @@ static void ruby_curl_easy_free(ruby_curl_easy *rbce) {
212
212
  if (rbce->curl) {
213
213
  curl_easy_cleanup(rbce->curl);
214
214
  }
215
+
216
+ if (rbce->first != NULL) {
217
+ curl_formfree(rbce->first);
218
+ rbce->first = NULL;
219
+ rbce->last = NULL;
220
+ }
215
221
  }
216
222
 
217
223
  void curl_easy_free(ruby_curl_easy *rbce) {
@@ -261,6 +267,8 @@ static void ruby_curl_easy_zero(ruby_curl_easy *rbce) {
261
267
  rbce->enable_cookies = 0;
262
268
  rbce->ignore_content_length = 0;
263
269
  rbce->callback_active = 0;
270
+ rbce->first = NULL;
271
+ rbce->last = NULL;
264
272
  }
265
273
 
266
274
  /*
@@ -118,12 +118,14 @@ module Curl
118
118
  recall
119
119
  execute
120
120
  end
121
+ alias :reload :reset
121
122
 
122
123
  def reset!
123
124
  recall!
124
125
  execute
125
126
  end
126
- module_function :reset!, :reset
127
+ alias :reload! :reset!
128
+ module_function :reset!, :reset, :reload!, :reload
127
129
 
128
130
  def status(raise_e=true)
129
131
  if $CarierThread and (s = $CarierThread.status)
@@ -698,7 +698,7 @@ module HTTPAccessKit
698
698
  form = "[action=#{@loc.path.inspect}]" if form == :self
699
699
  if form.is String
700
700
  form_node = at form
701
- raise XML::Error, "Can't find form by xpath `#{form}` on page #{inspect}" if !form_node
701
+ raise XML::Error, "Can't find form by xpath `#{form}` on page #{inspect}" if !form_node or form_node.name != 'form'
702
702
  else form_node = form
703
703
  end
704
704
  hash = form_node.inputs_all.merge!(hash)
@@ -1,27 +1,38 @@
1
1
  # encoding: utf-8
2
2
  module HTTPAccessKit
3
3
  include RMTools
4
- CONFIG = YAML.load(read(%W(rhack.yml #{File.join(ENV['HOME'], 'rhack.yml')})) || '') || {}
4
+ extend RMTools
5
+
6
+ CONFIG = if ENV["APP_ROOT"]
7
+ YAML.load(read(File.join ENV["APP_ROOT"], 'config/rhack.yml') || '') || {}
8
+ else
9
+ YAML.load(read(%W(config/rhack.yml rhack.yml #{File.join(ENV['HOME'], 'rhack.yml')})) || '') || {}
10
+ end
5
11
 
6
12
  UAS = if File.file?(uas = CONFIG['ua file'] || File.join(ENV['HOME'], 'ua.txt'))
7
13
  IO.read(uas)/"\n"
8
- else ['Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.1.8) Gecko/20100202 MRA 5.6 (build 03278) Firefox/3.5.8 (.NET CLR 3.5.30729)'] end
14
+ else ['Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1'] end
9
15
 
10
16
  L = RMLogger.new(CONFIG.logger || {})
11
17
 
12
- config = CONFIG.db || File.join(ENV['HOME'], 'db.yml')
18
+ db_config = if ENV["APP_ROOT"] and ENV["RAILS_ENV"] and File.file?(dbyml = File.join(ENV["APP_ROOT"], 'config/database.yml'))
19
+ YAML.load(read(dbyml))[ENV["RAILS_ENV"]]
20
+ else
21
+ CONFIG.db || File.join(ENV['HOME'], 'db.yml')
22
+ end
13
23
  begin
14
- DB = ActiveRecord::Base.establish_connection_with config
24
+ DB = ActiveRecord::Base.establish_connection_with db_config
15
25
  rescue LoadError
16
- DB = nil
26
+ DB = nil
17
27
  end
18
-
19
- cache = CONFIG.cache || {}
20
- CacheDir = cache.dir || File.join(File.dirname(__FILE__), 'cache')
21
- CacheTable = (cache.table || :rhack_cache).to_sym
22
- CacheTTL = cache.clean ? eval(cache.clean).b : nil
23
-
24
- RETRY = CONFIG['scout retry'] || {}
28
+ if DB and !(CONFIG.cache and CONFIG.cache.enabled == false)
29
+ cache = CONFIG.cache || {}
30
+ CacheDir = cache.dir || File.join(File.dirname(__FILE__), 'cache')
31
+ CacheTable = (cache.table || :rhack_cache).to_sym
32
+ CacheTTL = cache.clean ? eval(cache.clean).b : nil
33
+ end
34
+
35
+ RETRY = CONFIG['scout retry'] || {}
25
36
 
26
37
  $uas ||= UAS
27
38
  $Carier ||= Curl::Multi.new
@@ -32,5 +43,5 @@ module HTTPAccessKit
32
43
  end
33
44
  end
34
45
 
35
- module Curl; include HTTPAccessKit end
46
+ module Curl; extend HTTPAccessKit end
36
47
  RHACK = HTTPAccessKit
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
  $KCODE = 'UTF-8' if RUBY_VERSION < "1.9"
3
3
  require 'rmtools_dev' unless defined? RMTools
4
+ requrie 'cgi'
4
5
  here = File.expand_path File.dirname __FILE__
5
6
  require File.join(here, 'curb_core.so')
6
7
  require 'active_record'
@@ -13,4 +14,4 @@ require "#{here}/curl-global"
13
14
  require "#{here}/scout"
14
15
  require "#{here}/frame"
15
16
  require "#{here}/words"
16
- require "#{here}/cache" if RHACK::DB
17
+ require "#{here}/cache" if defined? RHACK::CacheDir
@@ -1,6 +1,8 @@
1
1
  # encoding: utf-8
2
2
  require 'rhack'
3
3
 
4
+ # Вызовы сервисов всегда ждут и возвращают обработанный ответ, если вызвваны без блока.
5
+ # В противном случае используется событийная модель и обработанный ответ передаётся в блок.
4
6
  module HTTPAccessKit
5
7
 
6
8
  class Service
@@ -67,14 +67,14 @@ module Curl
67
67
  end
68
68
 
69
69
  @req = {}
70
- @req.url = easy.last_effective_url
71
- @req.header = easy.headers
70
+ @req.url = easy.last_effective_url
71
+ @req.headers = easy.headers
72
72
  if range = easy.headers.Range and range[/(\d+)-(\d+)/]
73
- @req.range = $1.to_i .. $2.to_i
73
+ @req.range = $1.to_i .. $2.to_i
74
74
  end
75
75
  if easy.base and @req.meth = easy.base.last_method and @req.meth == :post
76
- @req.body = easy.post_body
77
- @req.mp = easy.multipart_form_post?
76
+ @req.body = easy.post_body
77
+ @req.mp = easy.multipart_form_post?
78
78
  end
79
79
  end
80
80
 
@@ -90,6 +90,7 @@ module Curl
90
90
  @error ? @error[key_or_index] : @hash[key_or_index.downcase]
91
91
  end
92
92
 
93
+ alias :headers :hash
93
94
  end
94
95
 
95
96
  end
@@ -101,25 +102,20 @@ module HTTPAccessKit
101
102
 
102
103
  def initialize(*args)
103
104
  if args[1].is Scout
104
- domain, str, scout = nil, *args
105
+ str, scout = *args
105
106
  ck = str//;\s*/
106
107
  ck[1..-1].each {|par|
107
108
  a = par/'='
108
- case a[0]
109
+ case a[0].downcase
109
110
  when 'path'; @path = (a[1] == '/') ? // : /^#{Regexp.escape a[1]}/
110
- when 'domain'
111
- if a[1].ord == ?.
112
- domain = a[1][1..-1]
113
- @domain = /(^|\.)#{Regexp.escape(domain)}$/
114
- else
115
- domain = a[1]
116
- @domain = /^#{Regexp.escape(a[1])}$/
117
- end
111
+ when 'domain'; @domain = /(^|\.)#{Regexp.escape a[1].sub(/^./, '')}$/
112
+ when 'expires'; @expires = a[1].to_time
118
113
  end
119
114
  }
120
- @name, @value = ck[0]/'='
121
- L.debug args if !domain
122
- (scout.cookies[domain || scout.uri.host] ||= {})[@name] = self
115
+ @name, @value = ck[0].split('=', 2)
116
+ #@value.gsub!(/^['"]|['"]$/, '')
117
+ L.debug args if !@domain
118
+ (scout.cookies[scout.uri.host] ||= {})[@name] = self
123
119
  else
124
120
  @name, cookie = args[0]
125
121
  case cookie
@@ -134,7 +130,11 @@ module HTTPAccessKit
134
130
  end
135
131
 
136
132
  def use(str, uri)
137
- str << @string if uri.path[@path] and !uri.root || uri.host[@domain]
133
+ if !@expires or @expires > Time.now
134
+ str << @string if uri.path[@path] and !uri.root || uri.host[@domain]
135
+ else
136
+ :expired
137
+ end
138
138
  end
139
139
 
140
140
  def to_s; @value end
@@ -296,7 +296,7 @@ module HTTPAccessKit
296
296
  header = DefaultHeader.dup
297
297
  if @cookieProc
298
298
  cookies = ''
299
- main_cks.each_value {|v| v.use cookies, @uri}
299
+ main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
300
300
  header['Cookie'] = cookies[0..-3]
301
301
  end
302
302
  if @refforge
metadata CHANGED
@@ -5,9 +5,9 @@ version: !ruby/object:Gem::Version
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 2
9
- - 2
10
- version: 0.2.2
8
+ - 3
9
+ - 0
10
+ version: 0.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Sergey Baev
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-08-01 00:00:00 Z
18
+ date: 2012-08-02 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: rmtools
@@ -80,7 +80,7 @@ dependencies:
80
80
  version: "2.12"
81
81
  type: :development
82
82
  version_requirements: *id004
83
- description: Webscrapping library based on curb gem extension and libxml-ruby (and optionally Johnson and ActiveRecord)
83
+ description: Webscraping library based on curb gem extension and libxml-ruby (and optionally Johnson and ActiveRecord)
84
84
  email:
85
85
  - tinbka@gmail.com
86
86
  executables: []
@@ -89,7 +89,6 @@ extensions:
89
89
  - ext/curb/extconf.rb
90
90
  extra_rdoc_files:
91
91
  - ./Manifest.txt
92
- - ./License.txt
93
92
  - ./README.txt
94
93
  - ./History.txt
95
94
  files:
@@ -150,7 +149,6 @@ files:
150
149
  - ./Rakefile
151
150
  - ./Manifest.txt
152
151
  - ./CURB-LICENSE
153
- - ./License.txt
154
152
  - ./README.txt
155
153
  - ./Gemfile
156
154
  - ./History.txt
@@ -1,17 +0,0 @@
1
- Copyright (c) 2010 Anonymous <tinbka@gmail.com>
2
- All rights reserved.
3
-
4
- This work is licensed under the same license as Ruby language.
5
-
6
-
7
- THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
8
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
10
- ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
11
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
12
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
13
- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
14
- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
15
- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
16
- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
17
- SUCH DAMAGE.