crawler_rocks 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: fb4377dbe2052791945d3f72cace73033a3a200d
4
+ data.tar.gz: d75b3b56b13643c5c8b1692c537a83d1c8eb7717
5
+ SHA512:
6
+ metadata.gz: 0d2e67ddde00e226486259ae630d1c6effe76a4cf9a62ae992b382898980c1964130a729bccdfacd4fbf7dc6b717cb242945f74a58be0379fd05cef57af6871f
7
+ data.tar.gz: b8ea4af35f695781215f166f66810f09fe67041d5a0f16d0763a8ebae70fed89ff96327542a689040674e8d96760ec62d093e7dfcac9cf0ce429cebd78395c42
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in crawler_rocks.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Yukai Huang
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # CrawlerRocks
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'crawler_rocks'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install crawler_rocks
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/crawler_rocks/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'crawler_rocks/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "crawler_rocks"
8
+ spec.version = CrawlerRocks::VERSION
9
+ spec.authors = ["Yukai Huang"]
10
+ spec.email = ["yukaihuang1993@hotmail.com"]
11
+ spec.summary = %q{a crawler toolkit}
12
+ spec.description = %q{a crawler toolkit}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+
24
+ spec.add_dependency "nokogiri", '~> 1.6', '>= 1.6.6.2'
25
+ spec.add_dependency "rest-client", '~> 1.8', '>= 1.8.0'
26
+ spec.add_dependency "curb", '~> 0.8', '>= 0.8.8'
27
+ end
@@ -0,0 +1,3 @@
1
+ module CrawlerRocks
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,60 @@
1
+ require "crawler_rocks/version"
2
+ require 'nokogiri'
3
+ require 'curb'
4
+
5
+ module CrawlerRocks
6
+ module HelperDSL
7
+ attr_reader :current_url
8
+
9
+ def initialize opts = {}
10
+ opts = Hash[opts.map{ |k, v| [k.to_sym, v] }]
11
+ @encoding = opts.fetch(:encoding, 'utf-8')
12
+ end
13
+
14
+ def visit url
15
+ handle_response Curl.get(url).body_str
16
+ # handle_response RestClient.get url
17
+ @current_url = url
18
+ end
19
+
20
+ def submit submit_name=nil, form_data={}
21
+ submit_selector = "input[type=\"submit\"][value=\"#{submit_name}\"]"
22
+
23
+ if submit_name.nil?
24
+ post_hash = get_view_state.merge(form_data)
25
+ else
26
+ post_hash = Hash[@doc.css(submit_selector).map{|node| [node[:name], node[:value]]}].merge(get_view_state).merge(form_data)
27
+ end
28
+
29
+ post_path = @doc.css(submit_selector).xpath('ancestor::form[1]//@action')[0].value
30
+
31
+ uri = URI.parse(@current_url)
32
+ if post_path[0] == '/'
33
+ post_path = "#{uri.scheme}://#{uri.host}/"
34
+ else
35
+ post_path = URI.join("#{File.dirname(uri.to_s)}/", post_path).to_s
36
+ end
37
+
38
+ post post_path, post_hash
39
+ end
40
+
41
+ def post url, opt = {}
42
+ # handle_response RestClient.post url, opt.merge({cookies: @cookies})
43
+ handle_response Curl.post(url, opt) do |curl|
44
+ curl.headers['Cookie'] = @cookies
45
+ end.body_str
46
+
47
+ @current_url = url
48
+ end
49
+
50
+ def get_view_state
51
+ Hash[@doc.css('input[type="hidden"]').map {|d| [d[:name], d[:value]]}]
52
+ end
53
+
54
+ private
55
+ def handle_response response
56
+ @doc = Nokogiri::HTML response.force_encoding(@encoding)
57
+ @cookies ||= response.cookies
58
+ end
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crawler_rocks
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Yukai Huang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: 1.6.6.2
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - "~>"
56
+ - !ruby/object:Gem::Version
57
+ version: '1.6'
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 1.6.6.2
61
+ - !ruby/object:Gem::Dependency
62
+ name: rest-client
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.8'
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: 1.8.0
71
+ type: :runtime
72
+ prerelease: false
73
+ version_requirements: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - "~>"
76
+ - !ruby/object:Gem::Version
77
+ version: '1.8'
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: 1.8.0
81
+ - !ruby/object:Gem::Dependency
82
+ name: curb
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - "~>"
86
+ - !ruby/object:Gem::Version
87
+ version: '0.8'
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: 0.8.8
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '0.8'
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: 0.8.8
101
+ description: a crawler toolkit
102
+ email:
103
+ - yukaihuang1993@hotmail.com
104
+ executables: []
105
+ extensions: []
106
+ extra_rdoc_files: []
107
+ files:
108
+ - ".gitignore"
109
+ - Gemfile
110
+ - LICENSE.txt
111
+ - README.md
112
+ - Rakefile
113
+ - crawler_rocks.gemspec
114
+ - lib/crawler_rocks.rb
115
+ - lib/crawler_rocks/version.rb
116
+ homepage: ''
117
+ licenses:
118
+ - MIT
119
+ metadata: {}
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 2.4.3
137
+ signing_key:
138
+ specification_version: 4
139
+ summary: a crawler toolkit
140
+ test_files: []