rhack 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.gemtest +0 -0
  2. data/CURB-LICENSE +51 -0
  3. data/Gemfile +4 -0
  4. data/History.txt +4 -0
  5. data/LICENSE +51 -0
  6. data/License.txt +17 -0
  7. data/Manifest.txt +61 -0
  8. data/README.txt +12 -0
  9. data/Rakefile +34 -0
  10. data/ext/curb-original/curb.c +977 -0
  11. data/ext/curb-original/curb.h +52 -0
  12. data/ext/curb-original/curb_config.h +235 -0
  13. data/ext/curb-original/curb_easy.c +3455 -0
  14. data/ext/curb-original/curb_easy.h +90 -0
  15. data/ext/curb-original/curb_errors.c +647 -0
  16. data/ext/curb-original/curb_errors.h +129 -0
  17. data/ext/curb-original/curb_macros.h +159 -0
  18. data/ext/curb-original/curb_multi.c +704 -0
  19. data/ext/curb-original/curb_multi.h +26 -0
  20. data/ext/curb-original/curb_postfield.c +523 -0
  21. data/ext/curb-original/curb_postfield.h +40 -0
  22. data/ext/curb-original/curb_upload.c +80 -0
  23. data/ext/curb-original/curb_upload.h +30 -0
  24. data/ext/curb/Makefile +157 -0
  25. data/ext/curb/curb.c +977 -0
  26. data/ext/curb/curb.h +52 -0
  27. data/ext/curb/curb_config.h +235 -0
  28. data/ext/curb/curb_easy.c +3430 -0
  29. data/ext/curb/curb_easy.h +94 -0
  30. data/ext/curb/curb_errors.c +647 -0
  31. data/ext/curb/curb_errors.h +129 -0
  32. data/ext/curb/curb_macros.h +159 -0
  33. data/ext/curb/curb_multi.c +710 -0
  34. data/ext/curb/curb_multi.h +26 -0
  35. data/ext/curb/curb_postfield.c +523 -0
  36. data/ext/curb/curb_postfield.h +40 -0
  37. data/ext/curb/curb_upload.c +80 -0
  38. data/ext/curb/curb_upload.h +30 -0
  39. data/ext/curb/extconf.rb +399 -0
  40. data/lib/cache.rb +44 -0
  41. data/lib/curl-global.rb +151 -0
  42. data/lib/extensions/browser/env.js +697 -0
  43. data/lib/extensions/browser/jquery.js +7180 -0
  44. data/lib/extensions/browser/xmlsax.js +1564 -0
  45. data/lib/extensions/browser/xmlw3cdom_1.js +1444 -0
  46. data/lib/extensions/browser/xmlw3cdom_2.js +2744 -0
  47. data/lib/extensions/curb.rb +125 -0
  48. data/lib/extensions/declarative.rb +153 -0
  49. data/lib/extensions/johnson.rb +63 -0
  50. data/lib/frame.rb +766 -0
  51. data/lib/init.rb +36 -0
  52. data/lib/rhack.rb +16 -0
  53. data/lib/rhack.yml.template +19 -0
  54. data/lib/rhack/proxy/checker.rb +226 -0
  55. data/lib/rhack/proxy/list.rb +196 -0
  56. data/lib/rhack/services.rb +445 -0
  57. data/lib/rhack_in.rb +2 -0
  58. data/lib/scout.rb +591 -0
  59. data/lib/words.rb +37 -0
  60. data/test/test_frame.rb +107 -0
  61. data/test/test_rhack.rb +5 -0
  62. data/test/test_scout.rb +53 -0
  63. metadata +195 -0
data/lib/words.rb ADDED
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+ if !defined? RuDict and !defined? String::RuDict
3
+
4
+ if d = RHACK::CONFIG['rudict']
5
+ if File.file? d and (d = YAML.load(read d)).is Hash
6
+ String::RuDict = d
7
+ end
8
+ elsif RHACK::DB
9
+ class RuDictionary < ActiveRecord::Base
10
+ declare :rudictionary, :id => false do |t|
11
+ t.string :word
12
+ t.string :form0
13
+ t.string :form1
14
+ t.string :form2
15
+ end
16
+ String::RuDict = Hash[all.map{|w| [w.word, [w.form0, w.form1, w.form2]]}]
17
+ end
18
+ end
19
+
20
+ end
21
+
22
+ class String
23
+ RuDict = {} if !defined? RuDict
24
+
25
+ def x(int)
26
+ "#{int} #{if cyr?
27
+ if forms = RuDict[self]
28
+ mod = int%10
29
+ forms[mod == 1 ? 0 : int.in(2..4) ? 1 : 2]
30
+ else self end
31
+ else
32
+ mod = int%10
33
+ mod == 1 ? self : pluralize
34
+ end}"
35
+ end
36
+
37
+ end
@@ -0,0 +1,107 @@
1
+ class TC_Frame < Test::Unit::TestCase
2
+ include HTTPAccessKit
3
+
4
+ def test_init
5
+ f = Frame 10
6
+ assert_equal 10, f.ss.size
7
+ assert !f.static
8
+ f = Frame "example.com", :ck=>{"key"=>"value"}, :timeout=>10
9
+ assert_equal 20, f.ss.size
10
+ assert_equal "http://example.com", f.loc.root
11
+ assert_instance_of Scout, f.ss.rand
12
+ assert_equal 'value', f.ss.next.main_cks.values.to_s
13
+ assert_equal 10, f.ss.next.timeout
14
+ assert f.static
15
+ assert_raise(ArgumentError) {Frame "example.com", 0}
16
+ end
17
+
18
+ end
19
+
20
+ class TC_StaticInterpreter < Test::Unit::TestCase
21
+ include HTTPAccessKit
22
+
23
+ def setup
24
+ @f = Frame("http://site.org/index.html", 1)
25
+ end
26
+
27
+ def test_target_fail
28
+ assert_raise(TargetError) {@f.interpret_request("http://example.com")}
29
+ assert_raise(TargetError) {@f.interpret_request({}, "http://example.com")}
30
+ assert_raise(TargetError) {@f.interpret_request({}, true, ["http://example.com", "http://site.org/index.html"])}
31
+ end
32
+
33
+ def test_simple
34
+ assert_equal [nil, [:loadGet, "http://site.org/index.html"], nil, {:eval=>true, :a=>:b}],
35
+ @f.interpret_request(:a=>:b)
36
+ assert_equal [nil, [:loadGet, "http://site.org/"], nil, {:eval=>nil}],
37
+ @f.interpret_request("http://site.org/", :eval=>nil)
38
+ assert_equal [true, nil, [[:loadGet, "http://site.org/page_1"], [:loadGet, "http://site.org/page_2"]], {:eval=>true, :wait=>1, :headers=>{'Referer'=>'localhost'}}],
39
+ @f.interpret_request((1..2).map{|i|"http://site.org/page_#{i}"}, :wait=>1, :headers=>{'Referer'=>'localhost'})
40
+ assert_equal [true, nil, [[:loadGet, "http://site.org/page_1"]], {:eval=>true}],
41
+ @f.interpret_request(["page_1"])
42
+ end
43
+
44
+ def test_zip
45
+ _1x1 = [true, nil, [[:loadPost, {:a=>:b}, false, "http://site.org/page_3"]], {:eval=>true}]
46
+ assert_equal _1x1, @f.interpret_request([{:a=>:b}], false, ["page_3"])
47
+ assert_equal _1x1, @f.interpret_request([{:a=>:b}], false, ["page_3"], :zip=>1)
48
+
49
+ assert_equal [true, nil, [[:loadPost, {:a=>:b}, false, "http://site.org/page_3"], [:loadPost, {:c=>:d}, false, "http://site.org/page_4"]], {:eval=>true}],
50
+ @f.interpret_request([{:a=>:b}, {:c=>:d}], :def, ["page_3", "page_4"], :zip=>true)
51
+ end
52
+
53
+ def test_zip_fail
54
+ assert_raise(ZippingError) {@f.interpret_request({:a=>:b, :_1=>:_2}, false, "page_3", :zip=>1)}
55
+ assert_raise(ZippingError) {@f.interpret_request([{:a=>:b}], false, "page_3", :zip=>0)}
56
+ assert_raise(ZippingError) {@f.interpret_request([{:a=>:b}, {:_1=>:_2}], false, ["page_3"], :zip=>1)}
57
+ end
58
+
59
+ def test_quad
60
+ _2x2 = [true, nil, [[:loadPost, {:a=>:b}, false, "http://site.org/page_3"], [:loadPost, {:a=>:b}, false, "http://site.org/page_4"], [:loadPost, {:c=>:d}, false, "http://site.org/page_3"], [:loadPost, {:c=>:d}, false, "http://site.org/page_4"]], {:eval=>true}]
61
+ assert_equal _2x2, @f.interpret_request([{:a=>:b},{:c=>:d}], :def, ["page_3", "page_4"], :zip=>false)
62
+ assert_equal _2x2, @f.interpret_request([{:a=>:b},{:c=>:d}], ["page_3", "page_4"])
63
+ end
64
+
65
+ def test_implicit
66
+ assert_equal [true, nil, [[:loadPost, {:a=>:b}, false, "http://site.org/index.html"]], {:eval=>true}],
67
+ @f.interpret_request([:a=>:b])
68
+ assert_equal [nil, [:loadGet, "http://site.org/index.html"], nil, {:eval=>true}],
69
+ @f.interpret_request
70
+ assert_equal [nil, [:loadPost, {:a=>:b, :_1=>:_2}, false, "http://site.org/"], nil, {:eval=>true}],
71
+ @f.interpret_request({:a=>:b, :_1=>:_2}, "/")
72
+ assert_equal [nil, [:loadPost, {:a=>:b, :_1=>:_2}, true, "http://site.org/page_3"], nil, {:eval=>true}],
73
+ @f.interpret_request({:a=>:b, :_1=>:_2}, "/", "page_3")
74
+ assert_equal [true, nil, [[:loadGet, "http://site.org/page_1"], [:loadGet, "http://site.org/page_2"]], {:eval=>true}],
75
+ @f.interpret_request(['page_1', 'page_2'], true, "/")
76
+ end
77
+
78
+ def test_params_fail
79
+ assert_raise(TypeError) {@f.interpret_request("/", [])}
80
+ assert_raise(TypeError) {@f.interpret_request([], "/")}
81
+ assert_raise(TypeError) {@f.interpret_request("/", "")}
82
+ assert_raise(TypeError) {@f.interpret_request([], "/", :a=>:b)}
83
+ assert_raise(TypeError) {@f.interpret_request([], true, "/")}
84
+ assert_raise(ArgumentError) {@f.interpret_request({:a=>:b}, [])}
85
+ assert_raise(ArgumentError) {@f.interpret_request({:a=>:b}, true, [])}
86
+ end
87
+
88
+ end
89
+
90
+ class TC_DynamicInterpreter < Test::Unit::TestCase
91
+ include HTTPAccessKit
92
+
93
+ def setup
94
+ @f = Frame()
95
+ end
96
+
97
+ def test_target_fail
98
+ assert_raise(TargetError) {@f.interpret_request}
99
+ assert_raise(TargetError) {@f.interpret_request([{}], "./")}
100
+ assert_raise(TargetError) {@f.interpret_request("example.com")}
101
+ assert_raise(TargetError) {@f.interpret_request({}, true, ["http://example.com", "site.org/index.html"])}
102
+ end
103
+
104
+ end
105
+
106
+
107
+
@@ -0,0 +1,5 @@
1
+ require 'frame'
2
+ require 'test/unit'
3
+ require 'test/unit/ui/console/testrunner'
4
+ require 'test_frame'
5
+ #Test::Unit::UI::Console::TestRunner.run
@@ -0,0 +1,53 @@
1
+ class TC_Scout < Test::Unit::TestCase
2
+ include HTTPAccessKit
3
+
4
+ def setup
5
+ Curl.run
6
+ end
7
+
8
+ def teardown
9
+ Curl.stop
10
+ end
11
+
12
+ def test_init
13
+ s = {}
14
+ assert_nothing_raised {
15
+ s = Scout 'http://example.com', ['127.0.0.1', 8000], :def, false
16
+ }
17
+ assert_equal s.ua, :rand
18
+ assert_equal s.proxystr, '127.0.0.1:8000'
19
+ assert_nil s.webproxy
20
+ end
21
+
22
+ def test_load
23
+ res = nil
24
+ s = Scout 'api.rubyonrails.org', :raise=>true
25
+ s.loadGet('/') {|c| res = c.res}
26
+ Curl.wait
27
+ assert_equal res.code, 200
28
+ assert_equal s.http.response_code, 200
29
+ s.loadGet 'http://example.com/aaaaaa'
30
+ assert_equal res.code, 200
31
+ Curl.wait
32
+ assert_equal res.code, 302
33
+ assert_equal s.http.response_code, 302
34
+ s.loadGet 'https://developer.mozilla.org/en'
35
+ Curl.wait
36
+ s.loadGet('./CSS') {|c| res = nil}
37
+ assert_equal res.code, 200
38
+ s.cp_on
39
+ Curl.wait
40
+ assert_nil res
41
+ assert_equal s.res.req.header.Referer, "https://developer.mozilla.org/CSS"
42
+ s.refforge = false
43
+ s.loadGet {|c| res = c.res.req.url}
44
+ Curl.wait
45
+ assert_equal res, "https://developer.mozilla.org/en/CSS"
46
+ assert_nil s.res.req.header.Referer
47
+ assert_not_empty s.main_cks
48
+ end
49
+
50
+ def test_fail
51
+ end
52
+
53
+ end
metadata ADDED
@@ -0,0 +1,195 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rhack
3
+ version: !ruby/object:Gem::Version
4
+ hash: 19
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 2
10
+ version: 0.2.2
11
+ platform: ruby
12
+ authors:
13
+ - Sergey Baev
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-08-01 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rmtools
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 23
29
+ segments:
30
+ - 1
31
+ - 0
32
+ - 0
33
+ version: 1.0.0
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 49
45
+ segments:
46
+ - 0
47
+ - 8
48
+ - 7
49
+ version: 0.8.7
50
+ type: :runtime
51
+ version_requirements: *id002
52
+ - !ruby/object:Gem::Dependency
53
+ name: libxml-ruby
54
+ prerelease: false
55
+ requirement: &id003 !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ hash: 21
61
+ segments:
62
+ - 1
63
+ - 1
64
+ - 3
65
+ version: 1.1.3
66
+ type: :runtime
67
+ version_requirements: *id003
68
+ - !ruby/object:Gem::Dependency
69
+ name: hoe
70
+ prerelease: false
71
+ requirement: &id004 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ hash: 27
77
+ segments:
78
+ - 2
79
+ - 12
80
+ version: "2.12"
81
+ type: :development
82
+ version_requirements: *id004
83
+ description: Webscrapping library based on curb gem extension and libxml-ruby (and optionally Johnson and ActiveRecord)
84
+ email:
85
+ - tinbka@gmail.com
86
+ executables: []
87
+
88
+ extensions:
89
+ - ext/curb/extconf.rb
90
+ extra_rdoc_files:
91
+ - ./Manifest.txt
92
+ - ./License.txt
93
+ - ./README.txt
94
+ - ./History.txt
95
+ files:
96
+ - ext/curb/curb_errors.c
97
+ - ext/curb/curb_errors.h
98
+ - ext/curb/Makefile
99
+ - ext/curb/curb_macros.h
100
+ - ext/curb/curb_multi.c
101
+ - ext/curb/curb_multi.h
102
+ - ext/curb/curb_upload.c
103
+ - ext/curb/curb_upload.h
104
+ - ext/curb/curb_config.h
105
+ - ext/curb/extconf.rb
106
+ - ext/curb/curb.c
107
+ - ext/curb/curb.h
108
+ - ext/curb/curb_easy.c
109
+ - ext/curb/curb_easy.h
110
+ - ext/curb/curb_postfield.c
111
+ - ext/curb/curb_postfield.h
112
+ - ext/curb-original/curb_errors.c
113
+ - ext/curb-original/curb_errors.h
114
+ - ext/curb-original/curb_macros.h
115
+ - ext/curb-original/curb_multi.c
116
+ - ext/curb-original/curb_multi.h
117
+ - ext/curb-original/curb_upload.c
118
+ - ext/curb-original/curb_upload.h
119
+ - ext/curb-original/curb_config.h
120
+ - ext/curb-original/curb.c
121
+ - ext/curb-original/curb.h
122
+ - ext/curb-original/curb_easy.c
123
+ - ext/curb-original/curb_easy.h
124
+ - ext/curb-original/curb_postfield.c
125
+ - ext/curb-original/curb_postfield.h
126
+ - lib/rhack/proxy/list.rb
127
+ - lib/rhack/proxy/checker.rb
128
+ - lib/rhack/services.rb
129
+ - lib/cache.rb
130
+ - lib/rhack.rb
131
+ - lib/scout.rb
132
+ - lib/rhack.yml.template
133
+ - lib/frame.rb
134
+ - lib/words.rb
135
+ - lib/curl-global.rb
136
+ - lib/extensions/curb.rb
137
+ - lib/extensions/declarative.rb
138
+ - lib/extensions/johnson.rb
139
+ - lib/extensions/browser/env.js
140
+ - lib/extensions/browser/jquery.js
141
+ - lib/extensions/browser/xmlw3cdom_1.js
142
+ - lib/extensions/browser/xmlw3cdom_2.js
143
+ - lib/extensions/browser/xmlsax.js
144
+ - lib/rhack_in.rb
145
+ - lib/init.rb
146
+ - test/test_rhack.rb
147
+ - test/test_scout.rb
148
+ - test/test_frame.rb
149
+ - ./LICENSE
150
+ - ./Rakefile
151
+ - ./Manifest.txt
152
+ - ./CURB-LICENSE
153
+ - ./License.txt
154
+ - ./README.txt
155
+ - ./Gemfile
156
+ - ./History.txt
157
+ - .gemtest
158
+ homepage: http://github.com/tinbka
159
+ licenses: []
160
+
161
+ post_install_message:
162
+ rdoc_options:
163
+ - --main
164
+ - README.txt
165
+ require_paths:
166
+ - lib
167
+ required_ruby_version: !ruby/object:Gem::Requirement
168
+ none: false
169
+ requirements:
170
+ - - ">="
171
+ - !ruby/object:Gem::Version
172
+ hash: 3
173
+ segments:
174
+ - 0
175
+ version: "0"
176
+ required_rubygems_version: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ hash: 3
182
+ segments:
183
+ - 0
184
+ version: "0"
185
+ requirements: []
186
+
187
+ rubyforge_project: rhack
188
+ rubygems_version: 1.8.17
189
+ signing_key:
190
+ specification_version: 3
191
+ summary: ""
192
+ test_files:
193
+ - test/test_rhack.rb
194
+ - test/test_scout.rb
195
+ - test/test_frame.rb