rhack 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.gemtest +0 -0
  2. data/CURB-LICENSE +51 -0
  3. data/Gemfile +4 -0
  4. data/History.txt +4 -0
  5. data/LICENSE +51 -0
  6. data/License.txt +17 -0
  7. data/Manifest.txt +61 -0
  8. data/README.txt +12 -0
  9. data/Rakefile +34 -0
  10. data/ext/curb-original/curb.c +977 -0
  11. data/ext/curb-original/curb.h +52 -0
  12. data/ext/curb-original/curb_config.h +235 -0
  13. data/ext/curb-original/curb_easy.c +3455 -0
  14. data/ext/curb-original/curb_easy.h +90 -0
  15. data/ext/curb-original/curb_errors.c +647 -0
  16. data/ext/curb-original/curb_errors.h +129 -0
  17. data/ext/curb-original/curb_macros.h +159 -0
  18. data/ext/curb-original/curb_multi.c +704 -0
  19. data/ext/curb-original/curb_multi.h +26 -0
  20. data/ext/curb-original/curb_postfield.c +523 -0
  21. data/ext/curb-original/curb_postfield.h +40 -0
  22. data/ext/curb-original/curb_upload.c +80 -0
  23. data/ext/curb-original/curb_upload.h +30 -0
  24. data/ext/curb/Makefile +157 -0
  25. data/ext/curb/curb.c +977 -0
  26. data/ext/curb/curb.h +52 -0
  27. data/ext/curb/curb_config.h +235 -0
  28. data/ext/curb/curb_easy.c +3430 -0
  29. data/ext/curb/curb_easy.h +94 -0
  30. data/ext/curb/curb_errors.c +647 -0
  31. data/ext/curb/curb_errors.h +129 -0
  32. data/ext/curb/curb_macros.h +159 -0
  33. data/ext/curb/curb_multi.c +710 -0
  34. data/ext/curb/curb_multi.h +26 -0
  35. data/ext/curb/curb_postfield.c +523 -0
  36. data/ext/curb/curb_postfield.h +40 -0
  37. data/ext/curb/curb_upload.c +80 -0
  38. data/ext/curb/curb_upload.h +30 -0
  39. data/ext/curb/extconf.rb +399 -0
  40. data/lib/cache.rb +44 -0
  41. data/lib/curl-global.rb +151 -0
  42. data/lib/extensions/browser/env.js +697 -0
  43. data/lib/extensions/browser/jquery.js +7180 -0
  44. data/lib/extensions/browser/xmlsax.js +1564 -0
  45. data/lib/extensions/browser/xmlw3cdom_1.js +1444 -0
  46. data/lib/extensions/browser/xmlw3cdom_2.js +2744 -0
  47. data/lib/extensions/curb.rb +125 -0
  48. data/lib/extensions/declarative.rb +153 -0
  49. data/lib/extensions/johnson.rb +63 -0
  50. data/lib/frame.rb +766 -0
  51. data/lib/init.rb +36 -0
  52. data/lib/rhack.rb +16 -0
  53. data/lib/rhack.yml.template +19 -0
  54. data/lib/rhack/proxy/checker.rb +226 -0
  55. data/lib/rhack/proxy/list.rb +196 -0
  56. data/lib/rhack/services.rb +445 -0
  57. data/lib/rhack_in.rb +2 -0
  58. data/lib/scout.rb +591 -0
  59. data/lib/words.rb +37 -0
  60. data/test/test_frame.rb +107 -0
  61. data/test/test_rhack.rb +5 -0
  62. data/test/test_scout.rb +53 -0
  63. metadata +195 -0
data/lib/words.rb ADDED
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+ if !defined? RuDict and !defined? String::RuDict
3
+
4
+ if d = RHACK::CONFIG['rudict']
5
+ if File.file? d and (d = YAML.load(read d)).is Hash
6
+ String::RuDict = d
7
+ end
8
+ elsif RHACK::DB
9
+ class RuDictionary < ActiveRecord::Base
10
+ declare :rudictionary, :id => false do |t|
11
+ t.string :word
12
+ t.string :form0
13
+ t.string :form1
14
+ t.string :form2
15
+ end
16
+ String::RuDict = Hash[all.map{|w| [w.word, [w.form0, w.form1, w.form2]]}]
17
+ end
18
+ end
19
+
20
+ end
21
+
22
+ class String
23
+ RuDict = {} if !defined? RuDict
24
+
25
+ def x(int)
26
+ "#{int} #{if cyr?
27
+ if forms = RuDict[self]
28
+ mod = int%10
29
+ forms[mod == 1 ? 0 : int.in(2..4) ? 1 : 2]
30
+ else self end
31
+ else
32
+ mod = int%10
33
+ mod == 1 ? self : pluralize
34
+ end}"
35
+ end
36
+
37
+ end
@@ -0,0 +1,107 @@
1
+ class TC_Frame < Test::Unit::TestCase
2
+ include HTTPAccessKit
3
+
4
+ def test_init
5
+ f = Frame 10
6
+ assert_equal 10, f.ss.size
7
+ assert !f.static
8
+ f = Frame "example.com", :ck=>{"key"=>"value"}, :timeout=>10
9
+ assert_equal 20, f.ss.size
10
+ assert_equal "http://example.com", f.loc.root
11
+ assert_instance_of Scout, f.ss.rand
12
+ assert_equal 'value', f.ss.next.main_cks.values.to_s
13
+ assert_equal 10, f.ss.next.timeout
14
+ assert f.static
15
+ assert_raise(ArgumentError) {Frame "example.com", 0}
16
+ end
17
+
18
+ end
19
+
20
+ class TC_StaticInterpreter < Test::Unit::TestCase
21
+ include HTTPAccessKit
22
+
23
+ def setup
24
+ @f = Frame("http://site.org/index.html", 1)
25
+ end
26
+
27
+ def test_target_fail
28
+ assert_raise(TargetError) {@f.interpret_request("http://example.com")}
29
+ assert_raise(TargetError) {@f.interpret_request({}, "http://example.com")}
30
+ assert_raise(TargetError) {@f.interpret_request({}, true, ["http://example.com", "http://site.org/index.html"])}
31
+ end
32
+
33
+ def test_simple
34
+ assert_equal [nil, [:loadGet, "http://site.org/index.html"], nil, {:eval=>true, :a=>:b}],
35
+ @f.interpret_request(:a=>:b)
36
+ assert_equal [nil, [:loadGet, "http://site.org/"], nil, {:eval=>nil}],
37
+ @f.interpret_request("http://site.org/", :eval=>nil)
38
+ assert_equal [true, nil, [[:loadGet, "http://site.org/page_1"], [:loadGet, "http://site.org/page_2"]], {:eval=>true, :wait=>1, :headers=>{'Referer'=>'localhost'}}],
39
+ @f.interpret_request((1..2).map{|i|"http://site.org/page_#{i}"}, :wait=>1, :headers=>{'Referer'=>'localhost'})
40
+ assert_equal [true, nil, [[:loadGet, "http://site.org/page_1"]], {:eval=>true}],
41
+ @f.interpret_request(["page_1"])
42
+ end
43
+
44
+ def test_zip
45
+ _1x1 = [true, nil, [[:loadPost, {:a=>:b}, false, "http://site.org/page_3"]], {:eval=>true}]
46
+ assert_equal _1x1, @f.interpret_request([{:a=>:b}], false, ["page_3"])
47
+ assert_equal _1x1, @f.interpret_request([{:a=>:b}], false, ["page_3"], :zip=>1)
48
+
49
+ assert_equal [true, nil, [[:loadPost, {:a=>:b}, false, "http://site.org/page_3"], [:loadPost, {:c=>:d}, false, "http://site.org/page_4"]], {:eval=>true}],
50
+ @f.interpret_request([{:a=>:b}, {:c=>:d}], :def, ["page_3", "page_4"], :zip=>true)
51
+ end
52
+
53
+ def test_zip_fail
54
+ assert_raise(ZippingError) {@f.interpret_request({:a=>:b, :_1=>:_2}, false, "page_3", :zip=>1)}
55
+ assert_raise(ZippingError) {@f.interpret_request([{:a=>:b}], false, "page_3", :zip=>0)}
56
+ assert_raise(ZippingError) {@f.interpret_request([{:a=>:b}, {:_1=>:_2}], false, ["page_3"], :zip=>1)}
57
+ end
58
+
59
+ def test_quad
60
+ _2x2 = [true, nil, [[:loadPost, {:a=>:b}, false, "http://site.org/page_3"], [:loadPost, {:a=>:b}, false, "http://site.org/page_4"], [:loadPost, {:c=>:d}, false, "http://site.org/page_3"], [:loadPost, {:c=>:d}, false, "http://site.org/page_4"]], {:eval=>true}]
61
+ assert_equal _2x2, @f.interpret_request([{:a=>:b},{:c=>:d}], :def, ["page_3", "page_4"], :zip=>false)
62
+ assert_equal _2x2, @f.interpret_request([{:a=>:b},{:c=>:d}], ["page_3", "page_4"])
63
+ end
64
+
65
+ def test_implicit
66
+ assert_equal [true, nil, [[:loadPost, {:a=>:b}, false, "http://site.org/index.html"]], {:eval=>true}],
67
+ @f.interpret_request([:a=>:b])
68
+ assert_equal [nil, [:loadGet, "http://site.org/index.html"], nil, {:eval=>true}],
69
+ @f.interpret_request
70
+ assert_equal [nil, [:loadPost, {:a=>:b, :_1=>:_2}, false, "http://site.org/"], nil, {:eval=>true}],
71
+ @f.interpret_request({:a=>:b, :_1=>:_2}, "/")
72
+ assert_equal [nil, [:loadPost, {:a=>:b, :_1=>:_2}, true, "http://site.org/page_3"], nil, {:eval=>true}],
73
+ @f.interpret_request({:a=>:b, :_1=>:_2}, "/", "page_3")
74
+ assert_equal [true, nil, [[:loadGet, "http://site.org/page_1"], [:loadGet, "http://site.org/page_2"]], {:eval=>true}],
75
+ @f.interpret_request(['page_1', 'page_2'], true, "/")
76
+ end
77
+
78
+ def test_params_fail
79
+ assert_raise(TypeError) {@f.interpret_request("/", [])}
80
+ assert_raise(TypeError) {@f.interpret_request([], "/")}
81
+ assert_raise(TypeError) {@f.interpret_request("/", "")}
82
+ assert_raise(TypeError) {@f.interpret_request([], "/", :a=>:b)}
83
+ assert_raise(TypeError) {@f.interpret_request([], true, "/")}
84
+ assert_raise(ArgumentError) {@f.interpret_request({:a=>:b}, [])}
85
+ assert_raise(ArgumentError) {@f.interpret_request({:a=>:b}, true, [])}
86
+ end
87
+
88
+ end
89
+
90
+ class TC_DynamicInterpreter < Test::Unit::TestCase
91
+ include HTTPAccessKit
92
+
93
+ def setup
94
+ @f = Frame()
95
+ end
96
+
97
+ def test_target_fail
98
+ assert_raise(TargetError) {@f.interpret_request}
99
+ assert_raise(TargetError) {@f.interpret_request([{}], "./")}
100
+ assert_raise(TargetError) {@f.interpret_request("example.com")}
101
+ assert_raise(TargetError) {@f.interpret_request({}, true, ["http://example.com", "site.org/index.html"])}
102
+ end
103
+
104
+ end
105
+
106
+
107
+
@@ -0,0 +1,5 @@
1
+ require 'frame'
2
+ require 'test/unit'
3
+ require 'test/unit/ui/console/testrunner'
4
+ require 'test_frame'
5
+ #Test::Unit::UI::Console::TestRunner.run
@@ -0,0 +1,53 @@
1
+ class TC_Scout < Test::Unit::TestCase
2
+ include HTTPAccessKit
3
+
4
+ def setup
5
+ Curl.run
6
+ end
7
+
8
+ def teardown
9
+ Curl.stop
10
+ end
11
+
12
+ def test_init
13
+ s = {}
14
+ assert_nothing_raised {
15
+ s = Scout 'http://example.com', ['127.0.0.1', 8000], :def, false
16
+ }
17
+ assert_equal s.ua, :rand
18
+ assert_equal s.proxystr, '127.0.0.1:8000'
19
+ assert_nil s.webproxy
20
+ end
21
+
22
+ def test_load
23
+ res = nil
24
+ s = Scout 'api.rubyonrails.org', :raise=>true
25
+ s.loadGet('/') {|c| res = c.res}
26
+ Curl.wait
27
+ assert_equal res.code, 200
28
+ assert_equal s.http.response_code, 200
29
+ s.loadGet 'http://example.com/aaaaaa'
30
+ assert_equal res.code, 200
31
+ Curl.wait
32
+ assert_equal res.code, 302
33
+ assert_equal s.http.response_code, 302
34
+ s.loadGet 'https://developer.mozilla.org/en'
35
+ Curl.wait
36
+ s.loadGet('./CSS') {|c| res = nil}
37
+ assert_equal res.code, 200
38
+ s.cp_on
39
+ Curl.wait
40
+ assert_nil res
41
+ assert_equal s.res.req.header.Referer, "https://developer.mozilla.org/CSS"
42
+ s.refforge = false
43
+ s.loadGet {|c| res = c.res.req.url}
44
+ Curl.wait
45
+ assert_equal res, "https://developer.mozilla.org/en/CSS"
46
+ assert_nil s.res.req.header.Referer
47
+ assert_not_empty s.main_cks
48
+ end
49
+
50
+ def test_fail
51
+ end
52
+
53
+ end
metadata ADDED
@@ -0,0 +1,195 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rhack
3
+ version: !ruby/object:Gem::Version
4
+ hash: 19
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 2
10
+ version: 0.2.2
11
+ platform: ruby
12
+ authors:
13
+ - Sergey Baev
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-08-01 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rmtools
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 23
29
+ segments:
30
+ - 1
31
+ - 0
32
+ - 0
33
+ version: 1.0.0
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 49
45
+ segments:
46
+ - 0
47
+ - 8
48
+ - 7
49
+ version: 0.8.7
50
+ type: :runtime
51
+ version_requirements: *id002
52
+ - !ruby/object:Gem::Dependency
53
+ name: libxml-ruby
54
+ prerelease: false
55
+ requirement: &id003 !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ hash: 21
61
+ segments:
62
+ - 1
63
+ - 1
64
+ - 3
65
+ version: 1.1.3
66
+ type: :runtime
67
+ version_requirements: *id003
68
+ - !ruby/object:Gem::Dependency
69
+ name: hoe
70
+ prerelease: false
71
+ requirement: &id004 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ hash: 27
77
+ segments:
78
+ - 2
79
+ - 12
80
+ version: "2.12"
81
+ type: :development
82
+ version_requirements: *id004
83
+ description: Webscrapping library based on curb gem extension and libxml-ruby (and optionally Johnson and ActiveRecord)
84
+ email:
85
+ - tinbka@gmail.com
86
+ executables: []
87
+
88
+ extensions:
89
+ - ext/curb/extconf.rb
90
+ extra_rdoc_files:
91
+ - ./Manifest.txt
92
+ - ./License.txt
93
+ - ./README.txt
94
+ - ./History.txt
95
+ files:
96
+ - ext/curb/curb_errors.c
97
+ - ext/curb/curb_errors.h
98
+ - ext/curb/Makefile
99
+ - ext/curb/curb_macros.h
100
+ - ext/curb/curb_multi.c
101
+ - ext/curb/curb_multi.h
102
+ - ext/curb/curb_upload.c
103
+ - ext/curb/curb_upload.h
104
+ - ext/curb/curb_config.h
105
+ - ext/curb/extconf.rb
106
+ - ext/curb/curb.c
107
+ - ext/curb/curb.h
108
+ - ext/curb/curb_easy.c
109
+ - ext/curb/curb_easy.h
110
+ - ext/curb/curb_postfield.c
111
+ - ext/curb/curb_postfield.h
112
+ - ext/curb-original/curb_errors.c
113
+ - ext/curb-original/curb_errors.h
114
+ - ext/curb-original/curb_macros.h
115
+ - ext/curb-original/curb_multi.c
116
+ - ext/curb-original/curb_multi.h
117
+ - ext/curb-original/curb_upload.c
118
+ - ext/curb-original/curb_upload.h
119
+ - ext/curb-original/curb_config.h
120
+ - ext/curb-original/curb.c
121
+ - ext/curb-original/curb.h
122
+ - ext/curb-original/curb_easy.c
123
+ - ext/curb-original/curb_easy.h
124
+ - ext/curb-original/curb_postfield.c
125
+ - ext/curb-original/curb_postfield.h
126
+ - lib/rhack/proxy/list.rb
127
+ - lib/rhack/proxy/checker.rb
128
+ - lib/rhack/services.rb
129
+ - lib/cache.rb
130
+ - lib/rhack.rb
131
+ - lib/scout.rb
132
+ - lib/rhack.yml.template
133
+ - lib/frame.rb
134
+ - lib/words.rb
135
+ - lib/curl-global.rb
136
+ - lib/extensions/curb.rb
137
+ - lib/extensions/declarative.rb
138
+ - lib/extensions/johnson.rb
139
+ - lib/extensions/browser/env.js
140
+ - lib/extensions/browser/jquery.js
141
+ - lib/extensions/browser/xmlw3cdom_1.js
142
+ - lib/extensions/browser/xmlw3cdom_2.js
143
+ - lib/extensions/browser/xmlsax.js
144
+ - lib/rhack_in.rb
145
+ - lib/init.rb
146
+ - test/test_rhack.rb
147
+ - test/test_scout.rb
148
+ - test/test_frame.rb
149
+ - ./LICENSE
150
+ - ./Rakefile
151
+ - ./Manifest.txt
152
+ - ./CURB-LICENSE
153
+ - ./License.txt
154
+ - ./README.txt
155
+ - ./Gemfile
156
+ - ./History.txt
157
+ - .gemtest
158
+ homepage: http://github.com/tinbka
159
+ licenses: []
160
+
161
+ post_install_message:
162
+ rdoc_options:
163
+ - --main
164
+ - README.txt
165
+ require_paths:
166
+ - lib
167
+ required_ruby_version: !ruby/object:Gem::Requirement
168
+ none: false
169
+ requirements:
170
+ - - ">="
171
+ - !ruby/object:Gem::Version
172
+ hash: 3
173
+ segments:
174
+ - 0
175
+ version: "0"
176
+ required_rubygems_version: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ hash: 3
182
+ segments:
183
+ - 0
184
+ version: "0"
185
+ requirements: []
186
+
187
+ rubyforge_project: rhack
188
+ rubygems_version: 1.8.17
189
+ signing_key:
190
+ specification_version: 3
191
+ summary: ""
192
+ test_files:
193
+ - test/test_rhack.rb
194
+ - test/test_scout.rb
195
+ - test/test_frame.rb