ginspider 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/ginspider/analysis.rb +40 -5
- data/lib/ginspider/main.rb +27 -9
- data/lib/ginspider/version.rb +1 -1
- data/lib/ginspider.rb +26 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24178b5f71bc5c092364a9066975e62e453cebd8
|
4
|
+
data.tar.gz: 6bfd4f30b2ac4146f3c99ff056991be04d3a02e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 18be42fc8f3ef9cf67231039228aa42d0a47998396a8b65da21b943b32436ef847b29721d32d11eb2730ca11d6e6b09ecc36420188fcbcd575e5c8420a6c0cea
|
7
|
+
data.tar.gz: a36c97cbb54a850be74b2a569ce2ca55e268cf28339f82a263361ab94a65d2b198d3fd5c4cc26b52b4a8e3b56b1e1320cee775bc0e201ea44bfe8557bf828013
|
data/README.md
CHANGED
data/lib/ginspider/analysis.rb
CHANGED
@@ -9,18 +9,53 @@ module Ginspider
|
|
9
9
|
# return nokogiri response
|
10
10
|
#
|
11
11
|
def self.noko_res(url)
|
12
|
-
res_body =
|
13
|
-
|
14
|
-
return
|
12
|
+
res_body = get_res_body(url)
|
13
|
+
noko_res = get_noko_res(res_body)
|
14
|
+
return noko_res
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# return links of response with url and options
|
19
|
+
#
|
20
|
+
def self.res_links(url, options = {})
|
21
|
+
res_body = get_res_body(url)
|
22
|
+
res_links = get_res_links(res_body, options)
|
23
|
+
return res_links
|
15
24
|
end
|
16
25
|
|
17
26
|
private
|
27
|
+
|
28
|
+
#
|
29
|
+
# get the response body
|
30
|
+
#
|
31
|
+
def self.get_res_body(url)
|
32
|
+
@res_body = Http.res_body(url)
|
33
|
+
return @res_body
|
34
|
+
end
|
35
|
+
|
18
36
|
#
|
19
37
|
# generate a nokogiri response
|
20
38
|
#
|
21
39
|
def self.get_noko_res(res_body)
|
22
|
-
noko_res = Nokogiri::HTML(res_body.to_s)
|
23
|
-
return noko_res
|
40
|
+
@noko_res = Nokogiri::HTML(res_body.to_s)
|
41
|
+
return @noko_res
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# get all the links from nokogiri object
|
46
|
+
#
|
47
|
+
def self.get_res_links(res_body, options = {})
|
48
|
+
@noko_res = get_noko_res(res_body)
|
49
|
+
@links = @noko_res.css(options["element"])
|
50
|
+
|
51
|
+
links = []
|
52
|
+
i = 0
|
53
|
+
@links.each do |link|
|
54
|
+
links[i] = link["href"].strip
|
55
|
+
i = i + 1
|
56
|
+
end
|
57
|
+
|
58
|
+
return links
|
24
59
|
end
|
25
60
|
|
26
61
|
end
|
data/lib/ginspider/main.rb
CHANGED
@@ -5,21 +5,39 @@ require 'ginspider/analysis'
|
|
5
5
|
|
6
6
|
module Ginspider
|
7
7
|
|
8
|
-
#
|
9
|
-
# the main method to crawl a website
|
10
|
-
#
|
11
|
-
def Ginspider.crawl(url, options = {})
|
12
|
-
Main.crawl(url, options)
|
13
|
-
end
|
14
|
-
|
15
8
|
class Main
|
16
9
|
|
17
10
|
#
|
18
|
-
#
|
11
|
+
# get the basic response
|
19
12
|
#
|
20
13
|
def self.crawl(url, options = {})
|
14
|
+
basic_res = basic_res(url)
|
15
|
+
ap basic_res
|
16
|
+
end
|
17
|
+
|
18
|
+
# define the main crawler
|
19
|
+
#
|
20
|
+
def self.crawl_with_noko(url, options = {})
|
21
21
|
noko_res = Analysis.noko_res(url)
|
22
|
-
ap noko_res
|
22
|
+
ap noko_res
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# define the main method to get all links
|
27
|
+
#
|
28
|
+
def self.links_of(url, options = {})
|
29
|
+
res_links = Analysis.res_links(url, options)
|
30
|
+
ap res_links
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
#
|
36
|
+
# get the basic response
|
37
|
+
#
|
38
|
+
def self.basic_res(url, options = {})
|
39
|
+
@basic_res = Http.basic_res(url)
|
40
|
+
return @basic_res
|
23
41
|
end
|
24
42
|
|
25
43
|
end
|
data/lib/ginspider/version.rb
CHANGED
data/lib/ginspider.rb
CHANGED
@@ -1,2 +1,27 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'ginspider/main'
|
2
|
+
require 'ginspider/main'
|
3
|
+
|
4
|
+
module Ginspider
|
5
|
+
|
6
|
+
#
|
7
|
+
# the api method to crawl a website, and get the basic response
|
8
|
+
#
|
9
|
+
def Ginspider.crawl(url, options = {})
|
10
|
+
Main.crawl(url, options)
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
# the api method to get all links with url and options
|
15
|
+
#
|
16
|
+
def Ginspider.links_of(url, options = {})
|
17
|
+
Main.links_of(url, options)
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
# the api method to get text with url and options
|
22
|
+
#
|
23
|
+
def Ginspider.text_of(url, options = {})
|
24
|
+
Main.text_of(url, options)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ginspider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- lancegin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -139,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
version: '0'
|
140
140
|
requirements: []
|
141
141
|
rubyforge_project:
|
142
|
-
rubygems_version: 2.
|
142
|
+
rubygems_version: 2.5.1
|
143
143
|
signing_key:
|
144
144
|
specification_version: 4
|
145
145
|
summary: to analyze some websites
|